This RMD file contains all coursework questions. Disclaimer: Some graphs might not be well-scaled (especially grid plots)

Setting working directory and loading relevant libraries

setwd("~/Desktop/data_files/dataverse_files")
# install.packages("ggplot2")
# install.packages("tidyr")
# install.packages("ragg")
# install.packages("cowplot")
# install.packages("dplyr")
# install.packages("reshape2")
# install.packages("scales")
# install.packages("lubridate)
# install.packages("future")
# install.packages("mlr3")
# install.packages("mlr3verse")
# install.packages("ranger")
library(ggplot2)
library(tidyr)
library(ragg)
library(cowplot) # For grid plots
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape2) # To melt and cast dataframes
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(scales) # For axis labels
library(lubridate) # To convert time
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:cowplot':
## 
##     stamp
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
# Libraries for machine learning
library(future)
library(mlr3verse)
## Loading required package: mlr3
library(mlr3learners)
library(mlr3pipelines)
library(mlr3tuning)
## Loading required package: paradox
library(mlr3viz)
library(paradox)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-3
library(ranger)

future::plan()
## sequential:
## - args: function (..., envir = parent.frame())
## - tweaked: FALSE
## - call: NULL

Loading data files – Years 2005 to 2007 and binding it into data frame years Loading supplementary datasets – planes and airports

# Creating tables saved in csv files
planes <- read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/plane-data.csv", header = TRUE)
airports <- read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/airports.csv", header = TRUE)

# Might take some time to load!
years <- rbind(read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/2005.csv.bz2"), read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/2006.csv.bz2"), read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/2007.csv.bz2"))

Summary of data frame years

str(years)
## 'data.frame':    21735733 obs. of  29 variables:
##  $ Year             : int  2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 ...
##  $ Month            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ DayofMonth       : int  28 29 30 31 2 3 4 5 6 7 ...
##  $ DayOfWeek        : int  5 6 7 1 7 1 2 3 4 5 ...
##  $ DepTime          : int  1603 1559 1603 1556 1934 2042 2046 NA 2110 1859 ...
##  $ CRSDepTime       : int  1605 1605 1610 1605 1900 1900 1900 1900 1900 1900 ...
##  $ ArrTime          : int  1741 1736 1741 1726 2235 9 2357 NA 8 2235 ...
##  $ CRSArrTime       : int  1759 1759 1805 1759 2232 2232 2232 2232 2223 2223 ...
##  $ UniqueCarrier    : chr  "UA" "UA" "UA" "UA" ...
##  $ FlightNum        : int  541 541 541 541 542 542 542 542 542 542 ...
##  $ TailNum          : chr  "N935UA" "N941UA" "N342UA" "N326UA" ...
##  $ ActualElapsedTime: int  158 157 158 150 121 147 131 NA 118 156 ...
##  $ CRSElapsedTime   : int  174 174 175 174 152 152 152 152 143 143 ...
##  $ AirTime          : int  131 136 131 129 106 97 100 NA 101 96 ...
##  $ ArrDelay         : int  -18 -23 -24 -33 3 97 85 NA 105 12 ...
##  $ DepDelay         : int  -2 -6 -7 -9 34 102 106 NA 130 -1 ...
##  $ Origin           : chr  "BOS" "BOS" "BOS" "BOS" ...
##  $ Dest             : chr  "ORD" "ORD" "ORD" "ORD" ...
##  $ Distance         : int  867 867 867 867 867 867 867 867 867 867 ...
##  $ TaxiIn           : int  4 6 9 11 5 3 5 0 2 4 ...
##  $ TaxiOut          : int  23 15 18 10 10 47 26 0 15 56 ...
##  $ Cancelled        : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ CancellationCode : chr  "" "" "" "" ...
##  $ Diverted         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CarrierDelay     : int  0 0 0 0 0 23 46 0 16 0 ...
##  $ WeatherDelay     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NASDelay         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ SecurityDelay    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ LateAircraftDelay: int  0 0 0 0 0 74 39 0 89 0 ...
summary(years)
##       Year          Month         DayofMonth      DayOfWeek    
##  Min.   :2005   Min.   : 1.00   Min.   : 1.00   Min.   :1.000  
##  1st Qu.:2005   1st Qu.: 4.00   1st Qu.: 8.00   1st Qu.:2.000  
##  Median :2006   Median : 7.00   Median :16.00   Median :4.000  
##  Mean   :2006   Mean   : 6.52   Mean   :15.73   Mean   :3.943  
##  3rd Qu.:2007   3rd Qu.: 9.00   3rd Qu.:23.00   3rd Qu.:6.000  
##  Max.   :2007   Max.   :12.00   Max.   :31.00   Max.   :7.000  
##                                                                
##     DepTime         CRSDepTime      ArrTime         CRSArrTime  
##  Min.   :   1     Min.   :   0   Min.   :   1     Min.   :   0  
##  1st Qu.: 931     1st Qu.: 930   1st Qu.:1110     1st Qu.:1116  
##  Median :1330     Median :1325   Median :1516     Median :1520  
##  Mean   :1341     Mean   :1334   Mean   :1487     Mean   :1497  
##  3rd Qu.:1733     3rd Qu.:1721   3rd Qu.:1912     3rd Qu.:1909  
##  Max.   :2930     Max.   :2359   Max.   :2955     Max.   :2400  
##  NA's   :416412                  NA's   :463805                 
##  UniqueCarrier        FlightNum      TailNum          ActualElapsedTime
##  Length:21735733    Min.   :   1   Length:21735733    Min.   : -66.0   
##  Class :character   1st Qu.: 586   Class :character   1st Qu.:  75.0   
##  Mode  :character   Median :1481   Mode  :character   Median : 107.0   
##                     Mean   :2140                      Mean   : 125.7   
##                     3rd Qu.:3364                      3rd Qu.: 156.0   
##                     Max.   :9619                      Max.   :1879.0   
##                                                       NA's   :463805   
##  CRSElapsedTime       AirTime           ArrDelay         DepDelay      
##  Min.   :-1240.0   Min.   :-1428.0   Min.   :-939.0   Min.   :-1200.0  
##  1st Qu.:   76.0   1st Qu.:   54.0   1st Qu.:  -9.0   1st Qu.:   -4.0  
##  Median :  108.0   Median :   84.0   Median :  -1.0   Median :    0.0  
##  Mean   :  126.8   Mean   :  102.3   Mean   :   8.7   Mean   :   10.1  
##  3rd Qu.:  156.0   3rd Qu.:  131.0   3rd Qu.:  13.0   3rd Qu.:    9.0  
##  Max.   : 1430.0   Max.   : 1958.0   Max.   :2598.0   Max.   : 2601.0  
##  NA's   :998       NA's   :463805    NA's   :463805   NA's   :416412   
##     Origin              Dest              Distance          TaxiIn        
##  Length:21735733    Length:21735733    Min.   :  11.0   Min.   :   0.000  
##  Class :character   Class :character   1st Qu.: 317.0   1st Qu.:   4.000  
##  Mode  :character   Mode  :character   Median : 569.0   Median :   5.000  
##                                        Mean   : 723.8   Mean   :   7.101  
##                                        3rd Qu.: 950.0   3rd Qu.:   8.000  
##                                        Max.   :4962.0   Max.   :1523.000  
##                                                                           
##     TaxiOut          Cancelled       CancellationCode      Diverted      
##  Min.   :   0.00   Min.   :0.00000   Length:21735733    Min.   :0.00000  
##  1st Qu.:  10.00   1st Qu.:0.00000   Class :character   1st Qu.:0.00000  
##  Median :  13.00   Median :0.00000   Mode  :character   Median :0.00000  
##  Mean   :  15.83   Mean   :0.01916                      Mean   :0.00218  
##  3rd Qu.:  19.00   3rd Qu.:0.00000                      3rd Qu.:0.00000  
##  Max.   :1339.00   Max.   :1.00000                      Max.   :1.00000  
##                                                                          
##   CarrierDelay       WeatherDelay          NASDelay        SecurityDelay     
##  Min.   :   0.000   Min.   :   0.0000   Min.   : -49.000   Min.   :  0.0000  
##  1st Qu.:   0.000   1st Qu.:   0.0000   1st Qu.:   0.000   1st Qu.:  0.0000  
##  Median :   0.000   Median :   0.0000   Median :   0.000   Median :  0.0000  
##  Mean   :   3.428   Mean   :   0.7042   Mean   :   3.582   Mean   :  0.0248  
##  3rd Qu.:   0.000   3rd Qu.:   0.0000   3rd Qu.:   0.000   3rd Qu.:  0.0000  
##  Max.   :2580.000   Max.   :1510.0000   Max.   :1392.000   Max.   :382.0000  
##                                                                              
##  LateAircraftDelay 
##  Min.   :   0.000  
##  1st Qu.:   0.000  
##  Median :   0.000  
##  Mean   :   4.436  
##  3rd Qu.:   0.000  
##  Max.   :1366.000  
## 

1. When is the best time of day, day of the week, and time of year to fly to minimise delays?

Data wrangling for years data frame and creating variable status

Setting Month, Year, DayOfWeek and DayofMonth as factors in data frame years

years$Month <- as.factor(years$Month)
years$Year <- as.factor(years$Year)
years$DayOfWeek <- as.factor(years$DayOfWeek)
years$DayofMonth <- as.factor(years$DayofMonth) 

Adding new column status to years for flight status

years$status <- NA
years$status[years$Diverted == 1] <- "Diverted"
years$status[years$DepDelay > 0] <- "Delayed"
years$status[years$Cancelled == 1] <- "Cancelled"
years$status <- ifelse(years$Diverted != 1 & years$DepDelay <= 0 & 
                         years$Cancelled != 1, "On Time", years$status)
years$status <- as.factor(years$status)

Summary of column status in data frame years

summary(years$status)
## Cancelled   Delayed  Diverted   On Time 
##    416412   8508330     21996  12788995

Bar plot of status of flights in percentage

# Creating data frame `status_perc` with `perc` as percentages of `status`
status_perc <- years %>% 
  count(status) %>% 
  mutate(perc = n / nrow(years) * 100)

# Creating plot labels for `perc` 
perc_label <- c("0", "20", "40", "60")

# Bar plot 
status_perc %>%
  ggplot(aes(x = status, y = perc)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  scale_y_continuous(labels = paste0(perc_label, "%")) +
  geom_text(aes(label = paste0(round(perc,2), "%")), position = position_dodge(width = 0.9), vjust = -0.4) + 
  labs(title = "Percentage of flight status between 2005 to 2007", x = "Flight Status", y = "Percentage") +
  theme_bw() +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12)) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 

Delay rate by month (Line plot)

# Creating plot labels for `Month`
month_label <- c("Jan","Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov","Dec") 

# Creating data frame `month_delay` with variable `delay_rate` to calculate delay rate by month
month_delay <- years %>% 
  select(Month, status) %>%
  group_by(Month) %>%
  summarize(num_delays = sum(status == 'Delayed'),
            num_flights = n(),
            delay_rate = sum(status == 'Delayed') / n())

# Line plot of **delay rate by month** 
plot_month <- month_delay %>%
  ggplot(aes(x = Month, y = delay_rate, group = 1)) +
  geom_line(color = "steelblue") +
  geom_point(color = "steelblue") + 
  scale_x_discrete(labels = month_label) +
  labs(title = "Month", y = "Delay Rate") +
  theme_classic() +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
        axis.title.x = element_blank()) 
plot_month

Delay rate by day of month (Line plot)

# Creating data frame `day_delay` with variable `delay_rate` to calculate delay rate by day of month 
day_delay <- years %>% 
  select(DayofMonth, status) %>%
  group_by(DayofMonth) %>%
  summarize(num_delays = sum(status == 'Delayed'),
            num_flights = n(),
            delay_rate = sum(status == 'Delayed') / n())

# Line plot of **delay rate by day of month** 
plot_day <- day_delay %>%
  ggplot(aes(x = DayofMonth, y = delay_rate, group = 1)) +
  geom_line(color = "steelblue") +
  geom_point(color = "steelblue") + 
  labs(title = "Day of Month", y = "Delay Rate") +
  theme_classic() +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
        axis.title.x = element_blank()) 
plot_day

Delay rate by day of week (Line plot)

# Creating plot labels for `DayOfWeek`
week_label <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")

# Creating data frame `day_delay` with variable `delay_rate` to calculate delay rate by day of week 
week_delay <- years %>% 
  select(DayOfWeek, status) %>%
  group_by(DayOfWeek) %>%
  summarize(num_delays = sum(status == 'Delayed'),
            num_flights = n(),
            delay_rate = sum(status == 'Delayed') / n())

# Line plot of **delay rate by day of week** 
plot_week <- week_delay %>%
  ggplot(aes(x = DayOfWeek, y = delay_rate, group = 1)) +
  geom_line(color = "steelblue") +
  geom_point(color = "steelblue") + 
  scale_x_discrete(labels = week_label) +
  labs(title = "Day of Week", y = "Delay Rate") +
  theme_classic() +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
        axis.title.x = element_blank()) 
plot_week

Preparing data for delay rate by hour (Part 1) – Plot is for own reference

# Creating data frame `all_hours` to categorise all `DepTime` as "Normal" or "Unusual"
all_hours <- years %>%
  select(DepTime, status) %>%
  drop_na(DepTime) %>%
  mutate(dep_hour = ifelse(
    nchar(DepTime) == 1 | nchar(DepTime) == 2 | DepTime > 2400,
    "Unusual", "Normal"))

# Calculating % of "Unusual" DepTime 
perc_hour <- all_hours %>%
  count(dep_hour) %>%
  mutate(hour_perc = n / nrow(all_hours) * 100)

# Creating plot labels for `hour_perc` 
perc_label1 <- c("0", "25", "50", "75", "100")

# Bar plot of % of Normal vs Unusual timings
perc_hour %>%
  ggplot(aes(x = dep_hour, y = hour_perc)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  scale_y_continuous(labels = paste0(perc_label1, "%")) +
  geom_text(aes(label = paste0(round(hour_perc,2), "%")), position = position_dodge(width = 0.9), vjust = -0.4) + 
  labs(title = "Percentage of Normal vs Unusual Departure Timings", x = "", y = "Percentage") +
  theme_bw() +
    theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12)) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

Since only 0.33% of the departure times from variable DepTime is categorised as “Unusual”, omitting this data will neither skew nor affect the result.

Preparing data for delay rate by hour (Part 2)

# Logical Data cleansing 
time_delay <- years %>%
  select(DepTime, status) %>%
  filter(nchar(DepTime) == 3) %>%
  mutate(DepTime = signif(DepTime, 1)) %>%
  mutate(DepTime = case_when(
    DepTime == 100 ~ "0100",
    DepTime == 200 ~ "0200",
    DepTime == 300 ~ "0300",
    DepTime == 400 ~ "0400",
    DepTime == 500 ~ "0500",
    DepTime == 600 ~ "0600",
    DepTime == 700 ~ "0700",
    DepTime == 800 ~ "0800",
    DepTime == 900 ~ "0900",
    TRUE ~ "1000"))
time_delay$DepTime <- factor(time_delay$DepTime)

time_delay2 <- years %>%
  select(DepTime, status) %>%
  filter(nchar(DepTime) == 4) %>%
  mutate(DepTime = signif(DepTime, 2)) %>%
  mutate(DepTime = case_when(
  DepTime > 2400 ~ paste0("0",as.character(DepTime - 2400)),
  TRUE ~ as.character(DepTime)))
time_delay2$DepTime <- factor(time_delay2$DepTime)

time_delay3 <- years %>%
  select(DepTime, status) %>%
  filter(nchar(DepTime) == 1 |
           nchar(DepTime) == 2) %>%
  mutate(DepTime = case_when(
    nchar(DepTime) == 1 ~ "2400",
    nchar(DepTime) == 2 ~ "2400",
    TRUE ~ as.character(DepTime)))
time_delay3$DepTime <- factor(time_delay3$DepTime)

# Data frame of cleaned data `normal_hours`, with `DepTime` factored by hour in `new_hour`
time_delay <- rbind(time_delay, time_delay2, time_delay3)
summary(time_delay$DepTime)
##    0100    0200    0300    0400    0500    0600    0700    0800    0900    1000 
##   19762    7746    1996    2116  124711 1314650 1368275 1445383 1350730 1357816 
##    1100    1200    1300    1400    1500    1600    1700    1800    1900    2000 
## 1332094 1357413 1322576 1304779 1280013 1323622 1363610 1321358 1224149  984434 
##    2100    2200    2300    2400 
##  801662  418428  188940  103058

Delay rate by hour (Line plot)

# Creating data frame `hour_delay` with variable `delay_rate` to calculate delay rate by hour 
time_delay <- time_delay %>%
  group_by(DepTime) %>%
  summarize(num_delays = sum(status == 'Delayed'),
            num_flights = n(),
            delay_rate = num_delays / num_flights)

# Line plot of **delay rate by hour** 
plot_hour <- time_delay %>%
  ggplot(aes(x = DepTime, y = delay_rate, group = 1)) +
  geom_line(color = "steelblue") +
  geom_point(color = "steelblue") + 
  labs(title = "Hour", y = "Delay Rate") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
        axis.title.x = element_blank()) 
plot_hour

Grid of all plots using cowplot package

p <- plot_grid(plot_month, plot_day, plot_week, plot_hour, scale = 1)
title <- ggdraw() + draw_label("Delay Rates Based On", fontface='bold', size = 17)
plot_grid(title, p, ncol=1, rel_heights=c(0.1, 1))

# Removing data frames from Question 1
rm(day_delay, month_delay, time_delay, week_delay)

2. Do older planes suffer more delays?

Preparation of data frame carrier

# Creating data frame `carrier`
carrier <- years %>%
  select(UniqueCarrier, TailNum, DepDelay, ArrDelay, Year, CarrierDelay, WeatherDelay, NASDelay, SecurityDelay, LateAircraftDelay)
colnames(carrier)[2] <- "tailnum"

# Removing NA values from `planes` dataset and naming it `plane_year`
planes$year <- as.numeric(planes$year)
## Warning: NAs introduced by coercion
planes <- subset(planes, is.na(year) == F)
summary(planes$year)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    1992    1999    1996    2002    2008
plane_year <- planes %>%
  select(tailnum, year) %>%
  filter(year != 0)

# Joining `plane_year` and `carrier` using inner join 
colnames(plane_year)[2] <- "planeyear"
carrier <- carrier %>%
  inner_join(plane_year) ## joining by tailnum
## Joining, by = "tailnum"
carrier$Year <- as.integer(as.character(carrier$Year))
Summary of data frame carrier
summary(carrier)
##  UniqueCarrier        tailnum             DepDelay           ArrDelay      
##  Length:16633761    Length:16633761    Min.   :-1200.00   Min.   :-692.00  
##  Class :character   Class :character   1st Qu.:   -4.00   1st Qu.:  -9.00  
##  Mode  :character   Mode  :character   Median :    0.00   Median :  -1.00  
##                                        Mean   :   10.16   Mean   :   9.05  
##                                        3rd Qu.:    9.00   3rd Qu.:  13.00  
##                                        Max.   : 2601.00   Max.   :2598.00  
##                                        NA's   :109765     NA's   :147012   
##       Year       CarrierDelay       WeatherDelay          NASDelay       
##  Min.   :2005   Min.   :   0.000   Min.   :   0.0000   Min.   : -49.000  
##  1st Qu.:2005   1st Qu.:   0.000   1st Qu.:   0.0000   1st Qu.:   0.000  
##  Median :2006   Median :   0.000   Median :   0.0000   Median :   0.000  
##  Mean   :2006   Mean   :   3.558   Mean   :   0.7186   Mean   :   3.806  
##  3rd Qu.:2007   3rd Qu.:   0.000   3rd Qu.:   0.0000   3rd Qu.:   0.000  
##  Max.   :2007   Max.   :2580.000   Max.   :1510.0000   Max.   :1392.000  
##                                                                          
##  SecurityDelay      LateAircraftDelay   planeyear   
##  Min.   :  0.0000   Min.   :   0.00   Min.   :1956  
##  1st Qu.:  0.0000   1st Qu.:   0.00   1st Qu.:1992  
##  Median :  0.0000   Median :   0.00   Median :1999  
##  Mean   :  0.0255   Mean   :   4.53   Mean   :1997  
##  3rd Qu.:  0.0000   3rd Qu.:   0.00   3rd Qu.:2002  
##  Max.   :366.0000   Max.   :1366.00   Max.   :2007  
## 
str(carrier)
## 'data.frame':    16633761 obs. of  11 variables:
##  $ UniqueCarrier    : chr  "UA" "UA" "UA" "UA" ...
##  $ tailnum          : chr  "N935UA" "N941UA" "N342UA" "N326UA" ...
##  $ DepDelay         : int  -2 -6 -7 -9 34 102 106 -1 -1 17 ...
##  $ ArrDelay         : int  -18 -23 -24 -33 3 97 85 12 -18 17 ...
##  $ Year             : int  2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 ...
##  $ CarrierDelay     : int  0 0 0 0 0 23 46 0 0 17 ...
##  $ WeatherDelay     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NASDelay         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ SecurityDelay    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ LateAircraftDelay: int  0 0 0 0 0 74 39 0 0 0 ...
##  $ planeyear        : num  1992 1992 1988 1988 1990 ...

Histogram of year of manufacture of planes from plane_year

hist <- plane_year %>%
  ggplot(aes(x=planeyear)) +
  geom_histogram(fill = "steelblue", binwidth = 1) +
  labs(title = "Year of Manufacture", y = "Frequency") + 
  geom_vline(xintercept=c(1980), linetype="dotted") +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.title.x = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold")) +
  theme(legend.position = "none") 

Line charts for mean departure and arrival delays

# Preparing data for line charts
plane_age <- carrier %>%
  mutate(age = Year - planeyear) %>%
  group_by(age) %>%
  summarise(
    mean_depdelay = mean(DepDelay, na.rm = TRUE),
    mean_arrdelay = mean(ArrDelay, na.rm = TRUE))

summary(plane_age$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -2.00   11.25   24.50   24.50   37.75   51.00
plane_age <- plane_age %>%
  filter(age > 0) # To remove negative values
# Line chart of Mean departure delay (all planes)
p1 <- ggplot(plane_age, aes(x=age, y=mean_depdelay, group=1)) +
  geom_line(color = "blue") +
  labs(x = "Years of service", y = "Mean Departure Delay (in minutes)") +
  geom_vline(xintercept=c(25), linetype="dotted") +
  theme_classic()

# Line charts of Mean arrival delay (all planes)
p2 <- ggplot(plane_age, aes(x=age, y=mean_arrdelay, group=1)) +
  geom_line(color = "blue") +
  labs(x = "Years of service", y = "Mean Arrival Delay (in minutes)") +
  geom_vline(xintercept=c(25), linetype="dotted") +
  theme_classic()

# Grid of all plots using `cowplot` package
bottom_row1 <- plot_grid(p1,p2)
plot_grid(hist, bottom_row1, nrow = 2)

Line charts of mean departure and arrival delays based on year of make

# Pre 1980 (Older) Line chart
older_planes <- plane_age %>%
  filter(age > 25)
colnames(older_planes) <- c("age", "Mean Departure Delay", "Mean Arrival Delay")
older_planes <- melt(older_planes, id.vars = "age")

plot_cols = c("#e48f1b", "#619ed6")

p3 <- ggplot(older_planes, aes(x = age, y = value)) + 
  geom_line(aes(color = variable)) +
  labs(title = "Planes manufactured before the 1980s" ,x = "Years of service", y = "Mean Delay (in minutes)") + 
  scale_color_manual(values = plot_cols) +
  ylim(2,15) +
  theme_bw() +
  theme(legend.title=element_blank()) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p3

# Post 1980 (Younger) Line chart
younger_planes <- plane_age %>%
  filter(age < 25)
colnames(younger_planes) <- c("age", "Mean Departure Delay", "Mean Arrival Delay")
younger_planes <- melt(younger_planes, id.vars = "age")

p4 <- ggplot(younger_planes, aes(x = age, y = value)) + 
  geom_line(aes(color = variable)) +
  labs(title = "Planes manufactured after the 1980s" ,x = "Years of service", y = "Mean Delay (in minutes)") + 
  scale_color_manual(values = plot_cols) +
  ylim(2,15) +
  theme_bw() +
  theme(legend.title=element_blank()) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p4

# Grid plot of both line charts
plot_grid(p3,p4)

Grouped bar chart of delay factors

# Creating new data frame with mean values of all delay factors
delay_factors <- carrier %>%
  mutate(age = Year - planeyear) %>%
  filter(age > 0) %>%
  group_by(age) %>%
  summarise(
    carrier_delay = mean(CarrierDelay, na.rm = TRUE),
    weather_delay = mean(WeatherDelay, na.rm = TRUE),
    nas_delay = mean(NASDelay, na.rm = TRUE),
    security_delay = mean(SecurityDelay, na.rm = TRUE),
    aircraft_delay = mean(LateAircraftDelay, na.rm = TRUE))
colnames(delay_factors) <- c("age", "Carrier Delay", "Weather Delay", "NAS Delay", "Security Delay", "Late Aircraft Delay")

# Data preparation
delay_factors <- melt(delay_factors, id.vars = "age")
delay_factors <- delay_factors %>%
  mutate(group = ifelse(
    age > 25, "Over 25 Years", "Below 25 Years"))
delay_factors <- dcast(delay_factors, group + variable ~ ., sum)
colnames(delay_factors)[3] <- "value"

grouped_bar <- delay_factors %>%
  ggplot(aes(fill=group, y=value, x=variable)) + 
  geom_bar(position="dodge", stat = "identity", width = 0.7) + 
  scale_y_continuous(labels = scales::comma) +
  scale_color_manual(values = plot_cols) +
  labs(title = "Mean Delay based on Delay Factors ", x = "", y = "Mean Delay (minutes)") + 
  theme_bw() +
  theme(legend.title=element_blank()) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
grouped_bar

3 by 1 Line chart (Top 3 delay factors)

# Creating new data frame with mean values of the top 3 delay factors
delay_lines <- carrier %>%
  mutate(age = Year - planeyear) %>%
  filter(age > 0) %>%
  group_by(age) %>%
  summarise(
    mean_carrierdelay = mean(CarrierDelay, na.rm = TRUE),
    mean_NASdelay = mean(NASDelay, na.rm = TRUE),
    mean_aircraftdelay = mean(LateAircraftDelay, na.rm = TRUE))

# 3x1 Line chart
p5 <- ggplot(delay_lines, aes(x=age, y=mean_carrierdelay, group=1)) +
  geom_line(color = "blue") +
  ylim(0,7) +
  labs(x = "Years of service", y = "Mean Carrier Delay (in minutes)") +
  theme_classic()  
p5

p6 <- ggplot(delay_lines, aes(x=age, y=mean_NASdelay, group=1)) +
  geom_line(color = "blue") +
  ylim(0,7) +
  labs(x = "Years of service", y = "Mean NAS Delay (in minutes)") +
  theme_classic()  
p6

p7 <- ggplot(delay_lines, aes(x=age, y=mean_aircraftdelay, group=1)) +
  geom_line(color = "blue") +
  ylim(0,7) +
  labs(x = "Years of service", y = "Mean Late Aircraft Delay (in minutes)") +
  theme_classic() 
p7

# Grid plot of grouped bar chart + 3x1 Line chart
bottom_row2 <- plot_grid(p5, p6, p7, nrow = 1)
plot_grid(grouped_bar, bottom_row2, ncol = 1)

# Removing data frames from Question 2
rm(carrier, plane_age, younger_planes, delay_factors, delay_lines)

3. How does the number of people flying between different locations change over time?

Creating Origin-Destination pairs in data frame od_pairs

# Origin-Dest pairs
od_pairs <- years %>% 
  group_by(Year, Origin, Dest) %>% 
  summarize(num_trips = n()) %>%
  arrange(desc(num_trips))
## `summarise()` has grouped output by 'Year', 'Origin'. You can override using
## the `.groups` argument.
od_pairs$combi <- paste0(od_pairs$Origin,"/",od_pairs$Dest)
od_pairs$combi <- as.factor(od_pairs$combi)
od_pairs$Year <- as.factor(od_pairs$Year)

# New data frame `sum_od` based on total number of trips in each combination
sum_od <- od_pairs %>%
  group_by(combi) %>%
  mutate(sum_trips = sum(num_trips)) %>%
  arrange(desc(sum_trips))
sum_od <- head(sum_od, n = 30) # finding the top 10 OD in 3 years

(For own reference) O-D pairs with most traffic in 3 years

# Bar chart 
sum_od %>% 
  ggplot(aes(y = combi, x = sum_trips)) + 
  geom_bar(stat = "identity", width = 0.7, fill = "steelblue") + 
  scale_x_continuous(labels = comma) +
  labs(title = "Origin-Destination Combinations with most trips", x = "Count", y = "Origin/Destination") + 
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "right") 

Finding top O-D pairs

# Filtering data frame to the top 10 OD combinations 
top_od_pairs <- od_pairs %>%
  filter(combi == "SAN/LAX" |
         combi == "OGG/HNL" |
         combi == "LGA/DCA" |
         combi == "LGA/BOS" |
         combi == "LAX/SAN" |
         combi == "LAX/LAS" |
         combi == "LAS/LAX" |
         combi == "HNL/OGG" |
         combi == "DCA/LGA" |
         combi == "BOS/LGA")

plot_cols = c("#e48f1b", "steelblue", "#aeaeae")

# Stacked bar chart of top 10 ODs
p1 <- top_od_pairs %>% 
  ggplot(aes(fill = Year, y = combi, x = num_trips)) + 
  geom_bar(position = "stack", stat = "identity", width = 0.7) + 
  scale_x_continuous(labels = comma) +
  scale_fill_manual(values = plot_cols) +
  labs(title = "Top 10 Origin-Destination Combinations", x = "Number of trips", y = "Origin/Destination") + 
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold")) 
p1

Data preparation for ODs with significant changes

# Manipulating of dataset
unusual_pairs <- od_pairs %>%
  filter(Year == 2005) %>%
  mutate(num_2005 = num_trips) 
unusual_pairs = subset(unusual_pairs, select = c("combi", "num_2005"))

unusual_pairs1 <- od_pairs %>%
  filter(Year == 2006) %>%
  mutate(num_2006 = num_trips) 
unusual_pairs1 = subset(unusual_pairs1, select = c("combi", "num_2006"))

unusual_pairs2 <- od_pairs %>%
  filter(Year == 2007) %>%
  mutate(num_2007 = num_trips) 
unusual_pairs2 = subset(unusual_pairs2, select = c("combi", "num_2007"))

# Using inner join to combine all 3 datasets and retrieving top 10 values 
odd_pairs <- unusual_pairs %>%
  inner_join(unusual_pairs1) %>%
  inner_join(unusual_pairs2) 
## Joining, by = "combi"
## Joining, by = "combi"
odd_pairs$max <- pmax(odd_pairs$num_2005, odd_pairs$num_2006, odd_pairs$num_2007)
odd_pairs$min <- pmin(odd_pairs$num_2005, odd_pairs$num_2006, odd_pairs$num_2007)
odd_pairs$max_diff <- odd_pairs$max - odd_pairs$min
odd_pairs1 <- odd_pairs %>%
  arrange(desc(max_diff))
odd_pairs1 <- head(odd_pairs1, n = 10)

# More data manipulation
odd_pairs1 <- melt(odd_pairs1, id.vars = "combi")
odd_pairs1 <- odd_pairs1 %>%
  mutate(case_when(
    variable == "num_2005" ~ "2005",
    variable == "num_2006" ~ "2006",
    variable == "num_2007" ~ "2007")) 
colnames(odd_pairs1) <- c("combi", "variable", "value", "Year")
odd_pairs1 <- odd_pairs1 %>%
  filter(variable != "max_diff")
odd_pairs1 <- head(odd_pairs1, n = 30)

Plot of top 10 OD combinations with significant changes

# Stacked bar chart
p2 <- odd_pairs1 %>% 
  ggplot(aes(fill = Year, y = combi, x = value)) + 
  geom_bar(position = "stack", stat = "identity", width = 0.7) + 
  scale_x_continuous(labels = comma) +
  scale_fill_manual(values = plot_cols) +
  labs(title = "Origin-Destination Combinations with significant changes in traffic", x = "Number of trips", y = "Origin/Destination") + 
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold")) 
p2

Grid plot of both plot using cowplot package

plot_grid(p1,p2)

Line chart of top 5 combinations

line_pairs <- years %>% 
  select(Year, Month, DayofMonth, Origin, Dest) %>%
  group_by(Year, Month, DayofMonth, Origin, Dest) %>%
  summarize(num_trips = n()) %>%
  arrange(desc(num_trips))
## `summarise()` has grouped output by 'Year', 'Month', 'DayofMonth', 'Origin'.
## You can override using the `.groups` argument.
line_pairs$combi <- paste0(line_pairs$Origin,"/",line_pairs$Dest)
line_pairs$combi <- as.factor(line_pairs$combi)
line_pairs$Year <- as.factor(line_pairs$Year)

top_5 <- line_pairs %>%
  filter(combi == "SAN/LAX" |
         combi == "LAX/SAN" | 
         combi == "LAX/LAS" |
         combi == "LAS/LAX" |
         combi == "BOS/LGA")
top_5$date <- as.Date(paste0(top_5$Year,"-",top_5$Month,"-",top_5$DayofMonth))

top_5 <- top_5 %>%
  group_by(month = floor_date(date, unit = "month"))

mean_line_pairs <- top_5 %>%
  group_by(combi, month) %>%
  summarise(
    mean_trips = mean(num_trips)
  )
## `summarise()` has grouped output by 'combi'. You can override using the
## `.groups` argument.
plot_cols2 = c("steelblue", "#e48f1b", "#aeaeae", "#f7d027", "steelblue2")
p3 <- ggplot(mean_line_pairs, aes(x = month, y = mean_trips, color = combi)) + 
  geom_line(size = 0.5) +
  scale_color_manual(values = plot_cols2) + 
  labs(title = "Traffic of Top 5 Origin-Destination Combinations over 3 years" , x = "", y = "Mean number of trips") + 
  theme_bw() +
  theme(legend.title=element_blank()) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p3

Line chart of top 5 combinations with the most changes

change_5 <- line_pairs %>%
  filter(combi == "OGG/HNL" |
         combi == "HNL/OGG" | 
         combi == "KOA/HNL" |
         combi == "HNL/LIH" |
         combi == "HNL/KOA")
change_5$date <- as.Date(paste0(change_5$Year,"-",change_5$Month,"-",change_5$DayofMonth))

change_5 <- change_5 %>%
  group_by(month = floor_date(date, unit = "month"))

mean_change_pairs <- change_5 %>%
  group_by(combi, month) %>%
  summarise(
    mean_trips = mean(num_trips))
## `summarise()` has grouped output by 'combi'. You can override using the
## `.groups` argument.
p4 <- ggplot(mean_change_pairs, aes(x = month, y = mean_trips, color = combi)) + 
  geom_line(size = 0.5) +
  scale_color_manual(values = plot_cols2) + 
  labs(title = "Traffic of Origin-Destination Combinations with significant increases over 3 years" , x = "", y = "Mean number of trips") + 
  theme_bw() +
  theme(legend.title=element_blank()) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p4

Grid plot of both plot using cowplot package

plot_grid(p3,p4)

# Removing data frames from Question 3
rm(od_pairs, sum_od, top_od_pairs, unusual_pairs, unusual_pairs1, unusal_pairs2, odd_pairs, odd_pairs1, line_pairs, top5)
## Warning in rm(od_pairs, sum_od, top_od_pairs, unusual_pairs, unusual_pairs1, :
## object 'unusal_pairs2' not found
## Warning in rm(od_pairs, sum_od, top_od_pairs, unusual_pairs, unusual_pairs1, :
## object 'top5' not found

4. Can you detect cascading failures as delays in one airport create delays in others?

Preparing data frame

# Setting variables as factors
years$Month <- as.factor(years$Month)
years$DayofMonth <- as.factor(years$DayofMonth) 

# Adding new column to data for flight status
years$dep_delayed <- "No"
years$dep_delayed[years$DepDelay > 0] <- "Yes"
years$arr_delayed <- "No"
years$arr_delayed[years$ArrDelay > 0] <- "Yes"
years$dep_delayed <- as.factor(years$dep_delayed)
years$arr_delayed <- as.factor(years$arr_delayed)
summary(years$dep_delayed)
##       No      Yes 
## 13227403  8508330
summary(years$arr_delayed)
##       No      Yes 
## 12021690  9714043

Delay rate per month (From question 1)

# plot for delay rate per month (from question 1)
month_label <- c("Jan","Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov","Dec") 
month_delay <- years %>% 
  select(Month, dep_delayed, arr_delayed) %>%
  group_by(Month) %>%
  summarise(num_delays = sum(dep_delayed == 'Yes' | arr_delayed == 'Yes'),
            num_flights = n(),
            delay_rate = num_delays / n())

p1 <- month_delay %>%
  ggplot(aes(x = Month, y = delay_rate, group = 1)) +
  geom_line(color = "steelblue") +
  geom_point(color = "steelblue") + 
  scale_x_discrete(labels = month_label) +
  labs(title = "Delay Rate per Month", y = "Delay Rate (in minutes)") +
  theme_classic() +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
        axis.title.x = element_blank()) 
p1

Histogram for number of flights by month

bar_grouped <- years %>%
  select(Month, Year) %>%
  group_by(Month, Year) %>%
  summarise(num_flights =  n())
## `summarise()` has grouped output by 'Month'. You can override using the
## `.groups` argument.
bar_grouped$Year <- factor(bar_grouped$Year)

plot_cols = c("#e48f1b", "steelblue", "#aeaeae")

p2 <- bar_grouped %>%
  ggplot(aes(fill = Year, y = num_flights, x = Month)) + 
  geom_bar(position="stack", stat = "identity", width = 0.7) + 
  scale_fill_manual(values = plot_cols) +
  scale_x_discrete(labels = month_label) +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Number of flights", x = "", y = "Frequency") + 
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))

# Grid plot using `cowplot` package
plot_grid(p1,p2, align = "h")

Third plot: Departure and Arrival rates in December (Month with highest delay rate)

first_month <- years %>%
  select(Month, dep_delayed, arr_delayed, DayofMonth) %>%
  filter(Month == 12) %>%
  group_by(DayofMonth) %>%
  summarise(num_arr = sum(arr_delayed == 'Yes'),
            num_flights = n(),
            arr_rate = num_arr / n(),
            num_dep = sum(dep_delayed == 'Yes'),
            num_flights = n(),
            dep_rate = num_dep / n())

first_month <- first_month[c(1,4,6)]
colnames(first_month) <- c("DayofMonth", "Arrival Delay Rate", "Departure Delay Rate")
first_month <- melt(first_month, id.vars = "DayofMonth")
str(first_month)
## 'data.frame':    62 obs. of  3 variables:
##  $ DayofMonth: Factor w/ 31 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ variable  : Factor w/ 2 levels "Arrival Delay Rate",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ value     : num  0.48 0.488 0.432 0.479 0.454 ...
p3 <- ggplot(first_month, aes(x = DayofMonth, y = value, group = variable)) + 
  geom_line(aes(color = variable)) +
  labs(title = "Delay Rate in December" , x = "", y = "") + 
  scale_color_manual(values = plot_cols) +
  theme_bw() +
  theme(legend.title=element_blank()) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p3

Preparing data for fourth plot: Delay rate on 22nd December (Day with highest delay)

# Logical data cleaning
day_aug <- years %>%
  select(Month, DayofMonth, DepTime, CRSDepTime, DepDelay, dep_delayed, arr_delayed) %>%
  filter(Month == 12, 
         DayofMonth == 22 |
         DayofMonth == 23) %>%
  drop_na(DepTime) %>%
  filter(nchar(DepTime) > 2)

day_aug1 <- day_aug %>%
  filter(nchar(DepTime) == 3) %>%
  mutate(DepTime = signif(DepTime, 1)) %>%
  mutate(DepTime = case_when(
    DepTime == 100 ~ "0100",
    DepTime == 200 ~ "0200",
    DepTime == 300 ~ "0300",
    DepTime == 400 ~ "0400",
    DepTime == 500 ~ "0500",
    DepTime == 600 ~ "0600",
    DepTime == 700 ~ "0700",
    DepTime == 800 ~ "0800",
    DepTime == 900 ~ "0900",
    TRUE ~ "1000"))


day_aug2 <- day_aug %>%
  filter(nchar(DepTime) == 4) %>%
  mutate(DepTime = signif(DepTime, 2)) %>%
  mutate(DepTime = case_when(
    DepTime == 2400 ~ "2400",
    DepTime > 2400 ~ paste0("0",as.character(DepTime - 2400)),
    TRUE ~ as.character(DepTime)))

day_aug <- rbind(day_aug1, day_aug2)
day_aug$DepTime <- factor(day_aug$DepTime)
summary(day_aug$DepTime)
## 0100 0200 0300 0400 0500 0600 0700 0800 0900 1000 1100 1200 1300 1400 1500 1600 
##  245   98   31   18  543 6594 7144 7454 6971 6898 7034 7066 6739 6944 6794 6998 
## 1700 1800 1900 2000 2100 2200 2300 2400 
## 7159 6980 6445 5518 4567 2923 1544  223
day_delay <- day_aug %>% 
  select(DayofMonth, DepTime, dep_delayed, arr_delayed) %>%
  group_by(DepTime, DayofMonth) %>%
  summarise(num_arr = sum(arr_delayed == 'Yes'),
            num_flights = n(),
            arr_rate = num_arr / n(),
            num_dep = sum(dep_delayed == 'Yes'),
            num_flights = n(),
            dep_rate = num_dep / n())
## `summarise()` has grouped output by 'DepTime'. You can override using the
## `.groups` argument.
day_delay$DepTime <- as.character(day_delay$DepTime)

day_delay <- day_delay[c(1,2,5,7)]
colnames(day_delay) <- c("DepTime", "DayofMonth", "Arrival Delay Rate", "Departure Delay Rate")
day_delay$DepTime <- factor(day_delay$DepTime)

day_delay <- melt(day_delay, id.vars = c("DepTime", "DayofMonth"))
str(day_delay)
## 'data.frame':    96 obs. of  4 variables:
##  $ DepTime   : Factor w/ 24 levels "0100","0200",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ DayofMonth: Factor w/ 31 levels "1","2","3","4",..: 22 23 22 23 22 23 22 23 22 23 ...
##  $ variable  : Factor w/ 2 levels "Arrival Delay Rate",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ value     : num  0.909 0.926 0.861 0.919 0.833 ...

Fourth plot

p4 <- day_delay %>%
  filter(DayofMonth == 22) %>%
  arrange(DepTime) %>%
  ggplot(aes(x = DepTime, y = value, group = variable)) + 
  geom_line(aes(color = variable)) +
  labs(title = "Delay Rate on 22nd December" , x = "", y = "") + 
  scale_color_manual(values = plot_cols) +
  theme_bw() +
  theme(legend.title=element_blank()) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p4

Grid plot using cowplot package

plot_grid(p3, p4, ncol=1)

Manually checking for cascading failures due to delays

check <- years %>%
  select(Year, Month, DayofMonth, CRSDepTime, DepTime, CRSArrTime, ArrTime, TailNum, FlightNum, Origin, Dest, arr_delayed, dep_delayed) %>%
  filter(Year == 2005,
         Month == 12,
         DayofMonth == 22,
         arr_delayed == "Yes",
         dep_delayed == "Yes",
         DepTime > 1100,
         DepTime < 1300)

# Found 2 examples of 1st degree cascading failures due to delays
check <- check %>% 
  filter(TailNum == "N957SW" |
         TailNum == "N835AE")
summary(check)
##    Year       Month     DayofMonth   CRSDepTime      DepTime       CRSArrTime  
##  2005:4   12     :4   22     :4    Min.   :1010   Min.   :1105   Min.   :1116  
##  2006:0   1      :0   1      :0    1st Qu.:1014   1st Qu.:1105   1st Qu.:1119  
##  2007:0   2      :0   2      :0    Median :1080   Median :1172   Median :1268  
##           3      :0   3      :0    Mean   :1082   Mean   :1176   Mean   :1276  
##           4      :0   4      :0    3rd Qu.:1148   3rd Qu.:1242   3rd Qu.:1426  
##           5      :0   5      :0    Max.   :1155   Max.   :1253   Max.   :1454  
##           (Other):0   (Other):0                                                
##     ArrTime       TailNum            FlightNum       Origin         
##  Min.   :1213   Length:4           Min.   :4688   Length:4          
##  1st Qu.:1215   Class :character   1st Qu.:4688   Class :character  
##  Median :1332   Mode  :character   Median :5559   Mode  :character  
##  Mean   :1357                      Mean   :5612                     
##  3rd Qu.:1474                      3rd Qu.:6483                     
##  Max.   :1550                      Max.   :6642                     
##                                                                     
##      Dest           arr_delayed dep_delayed
##  Length:4           No :0       No :0      
##  Class :character   Yes:4       Yes:4      
##  Mode  :character                          
##                                            
##                                            
##                                            
## 
# Removing data frames from Question 4
rm(month_delay, bar_grouped, first_month, day_aug, day_aug1, day_delay, check)

5. Use the available variables to construct a model that predicts delay.

I will be building a model to predict DepDelay, or departure delay.

Preparing data for exploratory data analysis

# Checking missing values (missing values or empty values)
 colSums(is.na(years)|years=='')  
##              Year             Month        DayofMonth         DayOfWeek 
##                 0                 0                 0                 0 
##           DepTime        CRSDepTime           ArrTime        CRSArrTime 
##            416412                 0            463805                 0 
##     UniqueCarrier         FlightNum           TailNum ActualElapsedTime 
##                 0                 0                22            463805 
##    CRSElapsedTime           AirTime          ArrDelay          DepDelay 
##               998            463805            463805            416412 
##            Origin              Dest          Distance            TaxiIn 
##                 0                 0                 0                 0 
##           TaxiOut         Cancelled  CancellationCode          Diverted 
##                 0                 0          21319319                 0 
##      CarrierDelay      WeatherDelay          NASDelay     SecurityDelay 
##                 0                 0                 0                 0 
## LateAircraftDelay            status       dep_delayed       arr_delayed 
##                 0                 0                 0                 0
# Creating new feature for status of flights
years$status <- NA
years$status[years$Diverted == 1] <- "Diverted"
years$status[years$DepDelay > 0] <- "Delayed"
years$status[years$Cancelled == 1] <- "Cancelled"
years$status <- ifelse(years$Diverted != 1 & years$DepDelay <= 0 & 
                         years$Cancelled != 1, "On Time", years$status)
years$status <- as.factor(years$status)

Factors 1 and 2: CRSDepTime and CRSArrTime

summary(years$CRSDepTime) ## Based on data frame, timings with "0" refers to 24:00, or 00:00
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0     930    1325    1334    1721    2359
summary(years$CRSArrTime) ## Based on data frame, timings with "0" refers to 24:00, or 00:00
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    1116    1520    1497    1909    2400
main <- years %>% ## Included a few rows for convenience -- For machine learning section
  select(TailNum, Year, CRSDepTime, CRSArrTime, DepDelay, CRSElapsedTime, status) %>%
  drop_na(DepDelay)  ## NA values are because flight is cancelled

summary(main) 
##    TailNum            Year           CRSDepTime     CRSArrTime  
##  Length:21319321    2005:7006866   Min.   :   0   Min.   :   0  
##  Class :character   2006:7019988   1st Qu.: 930   1st Qu.:1115  
##  Mode  :character   2007:7292467   Median :1325   Median :1520  
##                                    Mean   :1333   Mean   :1496  
##                                    3rd Qu.:1720   3rd Qu.:1908  
##                                    Max.   :2359   Max.   :2400  
##                                                                 
##     DepDelay        CRSElapsedTime         status        
##  Min.   :-1200.00   Min.   : -97.0   Cancelled:       0  
##  1st Qu.:   -4.00   1st Qu.:  76.0   Delayed  : 8508330  
##  Median :    0.00   Median : 109.0   Diverted :   21996  
##  Mean   :   10.07   Mean   : 127.1   On Time  :12788995  
##  3rd Qu.:    9.00   3rd Qu.: 157.0                       
##  Max.   : 2601.00   Max.   :1031.0                       
##                     NA's   :727
## Analysis:
## Single digit CRSDepTime represents the minutes in the hour 2400, eg CRSDepTime == 8 = 2408; same thing for DepTime. Timings over 2400 indicates next day -- hence take (Time - 2400)

Data cleaning for CRSDepTime and CRSArrTime

# Rounding off values to nearest hour and making it a factor from **0100 to 1000** (3 digit timings)

crs1 <- main %>%
  filter(nchar(CRSArrTime) == 3) %>%
  mutate(CRSArrTime = signif(CRSArrTime, 1)) %>%
  mutate(CRSArrTime = case_when(
    CRSArrTime == 100 ~ "0100",
    CRSArrTime == 200 ~ "0200",
    CRSArrTime == 300 ~ "0300",
    CRSArrTime == 400 ~ "0400",
    CRSArrTime == 500 ~ "0500",
    CRSArrTime == 600 ~ "0600",
    CRSArrTime == 700 ~ "0700",
    CRSArrTime == 800 ~ "0800",
    CRSArrTime == 900 ~ "0900",
    TRUE ~ "1000")) 
crs1$CRSArrTime <- factor(crs1$CRSArrTime)

crs2 <- main %>%
  filter(nchar(CRSDepTime) == 3) %>%
  mutate(CRSDepTime = signif(CRSDepTime, 1)) %>%
  mutate(CRSDepTime = case_when(
    CRSDepTime == 100 ~ "0100",
    CRSDepTime == 200 ~ "0200",
    CRSDepTime == 300 ~ "0300",
    CRSDepTime == 400 ~ "0400",
    CRSDepTime == 500 ~ "0500",
    CRSDepTime == 600 ~ "0600",
    CRSDepTime == 700 ~ "0700",
    CRSDepTime == 800 ~ "0800",
    CRSDepTime == 900 ~ "0900",
    TRUE ~ "1000")) 
crs2$CRSDepTime <- factor(crs2$CRSDepTime)

# Rounding off values to nearest hour and making it a factor from **1000 to 2400** (4 digit timings)
crs3 <- main %>%
  filter(nchar(CRSDepTime) == 4) %>%
  arrange(CRSDepTime) %>%
  mutate(CRSDepTime = signif(CRSDepTime, 2)) 
    
crs4 <- main %>%
  filter(nchar(CRSArrTime) == 4) %>%
  arrange(CRSArrTime) %>%
  mutate(CRSArrTime = signif(CRSArrTime, 2))

crs3$CRSDepTime <- factor(crs3$CRSDepTime)
crs4$CRSArrTime <- factor(crs4$CRSArrTime)

# Cleaning of single digit timings and rounding it to **2400*
crs5 <- main %>%
  filter(nchar(CRSDepTime) == 1 |
           nchar(CRSDepTime) == 2) %>%
  mutate(CRSDepTime = case_when(
    nchar(CRSDepTime) == 1 ~ "2400",
    nchar(CRSDepTime) == 2 ~ "2400",
    TRUE ~ as.character(CRSDepTime)))
 
crs6 <- main %>%
  filter(nchar(CRSArrTime) == 1 |
           nchar(CRSArrTime) == 2) %>%
  mutate(CRSArrTime = case_when(
    nchar(CRSArrTime) == 1 ~ "2400",
    nchar(CRSArrTime) == 2 ~ "2400",
    TRUE ~ as.character(CRSArrTime)))

crs5$CRSDepTime <- factor(crs5$CRSDepTime)
crs6$CRSArrTime <- factor(crs6$CRSArrTime)

# Binding of all dataframes together
arr <- rbind(crs1, crs4, crs6)
dep <- rbind(crs2, crs3, crs5)

rm(crs1, crs2, crs3, crs4, crs5, crs6)

Plot of CRSDepTime and CRSArrTime against mean departure delay rates

# CRSDepTime against mean departure delay rates
dep1 <- dep %>%
  group_by(CRSDepTime) %>%
  summarise(mean_delay = mean(DepDelay, na.rm = TRUE)) 

# CRSArrTime against mean departure delay rates
arr1 <- arr %>% 
  group_by(CRSArrTime) %>%
  summarise(mean_delay = mean(DepDelay, na.rm = TRUE)) %>%
  arrange(CRSArrTime)

p1 <- ggplot(dep1, aes(x = CRSDepTime, y = mean_delay, group = 1)) + 
  geom_line(color = "steelblue", size = 0.7) +
  geom_point(color = "steelblue") +
  labs(title = "Scheduled Departure Time" , x = "", y = "Mean Delay (minutes)") + 
  ylim(0,20) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
        axis.title.x = element_blank()) 

p2 <- ggplot(arr1, aes(x = CRSArrTime, y = mean_delay, group = 1)) + 
  geom_line(color = "steelblue", size = 0.7) +
  geom_point(color = "steelblue") +
  labs(title = "Scheduled Arrival Time" , x = "", y = "Mean Delay (minutes)") + 
  ylim(0,20) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
        axis.title.x = element_blank()) 

title <- ggdraw() + draw_label("Mean Departure Delay based on",
                               fontface='bold', size = 17)

p <- plot_grid(p1,p2)
plot_grid(title, p, ncol=1, rel_heights=c(0.1, 1))

Factor 3: age of Planes

(See question 2 plot) I will be including age of planes as I have concluded that the delay rate of planes increases and peaks when a plane is at 25 years of service, and decreases thereafter.

Factor 4: CRSElapsedTime

crs_elapsed <- main[c(1:100000),] ## Taking a proportion of data
crs_elapsed <- crs_elapsed %>%
  drop_na(CRSElapsedTime)

# Scatterplot of ArrDelay against DepDelay
ggplot(crs_elapsed, aes(x=DepDelay, y=CRSElapsedTime)) + 
  geom_point(alpha = 0.2, size = 0.3) +
  labs(title = "Scatter Plot of Scheduled Elapsed Time against Departure Delay (minutes)",
       x = "Departure Delay", y = "Scheduled Elapsed Time") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12))  

Conclusion: CRSDepTime, CRSArrTime, age (of plane), CRSElapsedTime are variables that will be included.

Plane manufacturer can’t be used as it contains too many NA’s, imputing large proportion of dataset will cause inaccuracy.

Cleaning up variables and creating new dataframe original

original_plane <- planes %>%
  select(tailnum, year)

dep = subset(dep, select = c("TailNum", "Year", "CRSDepTime", "DepDelay", "CRSElapsedTime"))
arr = subset(arr, select = c("TailNum", "Year", "CRSArrTime", "DepDelay", "CRSElapsedTime"))
original <- dep %>%
  inner_join(arr)
## Joining, by = c("TailNum", "Year", "DepDelay", "CRSElapsedTime")
colnames(original)[1] <- "tailnum"

original <- original %>% 
  inner_join(original_plane)
## Joining, by = "tailnum"
str(original)
## 'data.frame':    76736905 obs. of  7 variables:
##  $ tailnum       : chr  "N427UA" "N449UA" "N449UA" "N433UA" ...
##  $ Year          : Factor w/ 3 levels "2005","2006",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CRSDepTime    : Factor w/ 24 levels "0100","0200",..: 8 8 8 8 8 8 6 6 6 6 ...
##  $ DepDelay      : int  -4 0 0 -6 13 2 14 3 -4 -5 ...
##  $ CRSElapsedTime: int  197 197 197 197 197 197 112 112 112 112 ...
##  $ CRSArrTime    : Factor w/ 24 levels "0100","0200",..: 11 11 16 11 11 11 7 7 7 7 ...
##  $ year          : num  1995 1998 1998 1996 1995 ...
original$Year <- as.numeric(as.character(original$Year))
original$year <- as.numeric(original$year)

summary(original) # year(plane) has '0' and several NA's
##    tailnum               Year        CRSDepTime          DepDelay       
##  Length:76736905    Min.   :2005   0800   : 6225911   Min.   :-1200.00  
##  Class :character   1st Qu.:2005   0700   : 5748525   1st Qu.:   -4.00  
##  Mode  :character   Median :2006   1000   : 5502320   Median :    0.00  
##                     Mean   :2006   0600   : 5491855   Mean   :    1.18  
##                     3rd Qu.:2007   0900   : 5209006   3rd Qu.:    0.00  
##                     Max.   :2007   1200   : 5096041   Max.   : 2601.00  
##                                    (Other):43463247                     
##  CRSElapsedTime      CRSArrTime            year     
##  Min.   : -97.00   1000   : 6101454   Min.   :   0  
##  1st Qu.:  60.00   0800   : 5837636   1st Qu.:1994  
##  Median :  75.00   1200   : 5645913   Median :1997  
##  Mean   :  86.89   0900   : 5303718   Mean   :1994  
##  3rd Qu.: 100.00   1400   : 5091362   3rd Qu.:2001  
##  Max.   :1031.00   1600   : 5084999   Max.   :2007  
##  NA's   :1         (Other):43671823
# Imputing median value
original$year[is.na(original$year)] = median(original$year, na.rm=TRUE)
original$CRSElapsedTime[is.na(original$CRSElapsedTime)] = median(original$CRSElapsedTime, na.rm=TRUE)


original <- original %>%
  filter(year > 0) %>%
  mutate(age = Year - year) %>%
  filter(age > 0) # There will be several negative values, also a small proportion, cleaning up data

# Encoding factors
original <- original %>%
  mutate_if(is.character, as.factor)
original <- original %>%
  mutate_if(is.factor, as.numeric)
summary(original)
##     tailnum          Year        CRSDepTime       DepDelay        
##  Min.   :   1   Min.   :2005   Min.   : 1.00   Min.   :-1200.000  
##  1st Qu.:1618   1st Qu.:2005   1st Qu.: 9.00   1st Qu.:   -4.000  
##  Median :2357   Median :2006   Median :12.00   Median :    0.000  
##  Mean   :2229   Mean   :2006   Mean   :12.72   Mean   :    1.159  
##  3rd Qu.:2798   3rd Qu.:2007   3rd Qu.:17.00   3rd Qu.:    0.000  
##  Max.   :4171   Max.   :2007   Max.   :24.00   Max.   : 2601.000  
##  CRSElapsedTime      CRSArrTime         year           age        
##  Min.   : -97.00   Min.   : 1.00   Min.   :1956   Min.   : 1.000  
##  1st Qu.:  60.00   1st Qu.:10.00   1st Qu.:1994   1st Qu.: 5.000  
##  Median :  75.00   Median :14.00   Median :1997   Median : 8.000  
##  Mean   :  86.62   Mean   :14.07   Mean   :1996   Mean   : 9.768  
##  3rd Qu.: 100.00   3rd Qu.:18.00   3rd Qu.:2001   3rd Qu.:13.000  
##  Max.   :1031.00   Max.   :24.00   Max.   :2006   Max.   :51.000
# Checking for missing values
colSums(is.na(original)|original=='')
##        tailnum           Year     CRSDepTime       DepDelay CRSElapsedTime 
##              0              0              0              0              0 
##     CRSArrTime           year            age 
##              0              0              0
str(original) 
## 'data.frame':    76010596 obs. of  8 variables:
##  $ tailnum       : num  1496 1606 1606 1524 1490 ...
##  $ Year          : num  2005 2005 2005 2005 2005 ...
##  $ CRSDepTime    : num  8 8 8 8 8 8 6 6 6 6 ...
##  $ DepDelay      : int  -4 0 0 -6 13 2 14 3 -4 -5 ...
##  $ CRSElapsedTime: num  197 197 197 197 197 197 112 112 112 112 ...
##  $ CRSArrTime    : num  11 11 16 11 11 11 7 7 7 7 ...
##  $ year          : num  1995 1998 1998 1996 1995 ...
##  $ age           : num  10 7 7 9 10 7 14 13 14 17 ...
# Subsetting to columns needed
original <- original[c(3:6,8)]
str(original)
## 'data.frame':    76010596 obs. of  5 variables:
##  $ CRSDepTime    : num  8 8 8 8 8 8 6 6 6 6 ...
##  $ DepDelay      : int  -4 0 0 -6 13 2 14 3 -4 -5 ...
##  $ CRSElapsedTime: num  197 197 197 197 197 197 112 112 112 112 ...
##  $ CRSArrTime    : num  11 11 16 11 11 11 7 7 7 7 ...
##  $ age           : num  10 7 7 9 10 7 14 13 14 17 ...

Training and evaluating

task_original <- TaskRegr$new(original, id = "Delay", target = "DepDelay")
print(task_original)
## <TaskRegr:Delay> (76010596 x 5)
## * Target: DepDelay
## * Properties: -
## * Features (4):
##   - dbl (4): CRSArrTime, CRSDepTime, CRSElapsedTime, age
task_original$feature_names
## [1] "CRSArrTime"     "CRSDepTime"     "CRSElapsedTime" "age"
task_original$target_names
## [1] "DepDelay"
task_original$filter(rows = 1:100000)
task_original$select(setdiff(task_original$feature_names, "DepDelay"))

measure <- msr("regr.mse")

learner_lm <- lrn("regr.lm")
gr_lm <- po("imputemean") %>>%
  po(learner_lm)
glrn_lm <-  GraphLearner$new(gr_lm)

set.seed(1)
train_set <- sample(task_original$nrow, 0.7 * task_original$nrow)
test_set <- setdiff(seq_len(task_original$nrow), train_set)
glrn_lm$train(task_original, row_ids = train_set)
glrn_lm$predict(task_original, row_ids = test_set)$score()
## regr.mse 
## 78.64405
### regr.mse = 70.12866

Ridge Regression: Tuning Hyperparameters

learner_ridge <- lrn("regr.glmnet")
learner_ridge$param_set$values <- list(alpha = 0, lambda = 0.001)
gr_ridge <- po("scale") %>>%
  po("imputemean") %>>%
  po(learner_ridge)
glrn_ridge <- GraphLearner$new(gr_ridge)
glrn_ridge$train(task_original, row_ids = train_set)
glrn_ridge$predict(task_original, row_ids = test_set)$score()
## regr.mse 
## 78.64404
### regr.mse = 70.12547

learner_ridge2 <- lrn("regr.glmnet")
learner_ridge2$param_set$values <- list(alpha = 0)
gr_ridge2 <- po("scale") %>>%
  po("imputemean") %>>%
  po(learner_ridge2)
glrn_ridge2 <- GraphLearner$new(gr_ridge2)

tune_lambda <- ParamSet$new (list(
  ParamDbl$new("regr.glmnet.lambda", lower = 0.03, upper = 2)
))
tuner <- tnr("grid_search")
terminator <- trm("evals", n_evals = 20)

at_ridge <- AutoTuner$new(
  learner = glrn_ridge2,
  resampling = rsmp("cv", folds = 3),
  measure = measure,
  search_space = tune_lambda,
  terminator = terminator,
  tuner = tuner
)

at_ridge$train(task_original, row_ids = train_set)
## INFO  [04:04:55.837] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]' 
## INFO  [04:04:55.938] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:04:56.069] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:04:56.232] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:05:12.954] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:05:26.835] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:05:40.975] [mlr3] Finished benchmark 
## INFO  [04:05:41.182] [bbotk] Result of batch 1: 
## INFO  [04:05:41.206] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:05:41.206] [bbotk]                0.03 101.3591        0      0           44.568 
## INFO  [04:05:41.206] [bbotk]                                 uhash 
## INFO  [04:05:41.206] [bbotk]  1bb99e62-a930-437e-9cb9-1683654c1b8a 
## INFO  [04:05:41.214] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:05:41.399] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:05:41.431] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:05:54.924] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:06:08.061] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:06:19.847] [mlr3] Finished benchmark 
## INFO  [04:06:19.903] [bbotk] Result of batch 2: 
## INFO  [04:06:19.905] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:06:19.905] [bbotk]            1.124444  101.368        0      0           38.343 
## INFO  [04:06:19.905] [bbotk]                                 uhash 
## INFO  [04:06:19.905] [bbotk]  e3badafa-2a97-4b7b-b141-75534e353e67 
## INFO  [04:06:19.907] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:06:19.975] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:06:19.985] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:06:35.305] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:06:48.452] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:07:04.378] [mlr3] Finished benchmark 
## INFO  [04:07:04.455] [bbotk] Result of batch 3: 
## INFO  [04:07:04.459] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:07:04.459] [bbotk]                   2 101.3841        0      0           44.323 
## INFO  [04:07:04.459] [bbotk]                                 uhash 
## INFO  [04:07:04.459] [bbotk]  fb80b5f5-470b-4c17-9cb9-f6936a4f77e8 
## INFO  [04:07:04.461] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:07:04.562] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:07:04.578] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:07:17.643] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:07:31.230] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:07:44.274] [mlr3] Finished benchmark 
## INFO  [04:07:44.433] [bbotk] Result of batch 4: 
## INFO  [04:07:44.438] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:07:44.438] [bbotk]           0.9055556 101.3651        0      0           39.624 
## INFO  [04:07:44.438] [bbotk]                                 uhash 
## INFO  [04:07:44.438] [bbotk]  0fa39165-eab7-4739-95da-09a421dde0dc 
## INFO  [04:07:44.443] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:07:44.517] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:07:44.528] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:07:56.502] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:08:15.630] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:08:31.694] [mlr3] Finished benchmark 
## INFO  [04:08:31.868] [bbotk] Result of batch 5: 
## INFO  [04:08:31.873] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:08:31.873] [bbotk]            1.781111 101.3795        0      0           47.062 
## INFO  [04:08:31.873] [bbotk]                                 uhash 
## INFO  [04:08:31.873] [bbotk]  e9fbbeae-17c3-4a36-96b5-cc353e752237 
## INFO  [04:08:31.880] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:08:32.021] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:08:32.040] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:08:47.770] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:09:02.833] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:09:16.761] [mlr3] Finished benchmark 
## INFO  [04:09:16.884] [bbotk] Result of batch 6: 
## INFO  [04:09:16.888] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:09:16.888] [bbotk]           0.4677778 101.3607        0      0           44.606 
## INFO  [04:09:16.888] [bbotk]                                 uhash 
## INFO  [04:09:16.888] [bbotk]  7dd216bc-59e2-465f-9ac4-b26bd817dba1 
## INFO  [04:09:16.892] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:09:17.002] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:09:17.021] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:09:32.574] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:09:45.321] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:10:05.538] [mlr3] Finished benchmark 
## INFO  [04:10:05.708] [bbotk] Result of batch 7: 
## INFO  [04:10:05.713] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:10:05.713] [bbotk]            1.343333 101.3715        0      0           48.366 
## INFO  [04:10:05.713] [bbotk]                                 uhash 
## INFO  [04:10:05.713] [bbotk]  08a19f0b-4ebc-44cc-8448-9c30988dcabb 
## INFO  [04:10:05.717] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:10:05.822] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:10:05.840] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:10:21.067] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:10:35.523] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:10:49.160] [mlr3] Finished benchmark 
## INFO  [04:10:49.243] [bbotk] Result of batch 8: 
## INFO  [04:10:49.246] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:10:49.246] [bbotk]           0.2488889 101.3595        0      0           43.264 
## INFO  [04:10:49.246] [bbotk]                                 uhash 
## INFO  [04:10:49.246] [bbotk]  44902c76-c509-472c-be50-cc5581639d3b 
## INFO  [04:10:49.248] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:10:49.312] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:10:49.321] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:11:03.151] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:11:18.961] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:11:32.642] [mlr3] Finished benchmark 
## INFO  [04:11:32.771] [bbotk] Result of batch 9: 
## INFO  [04:11:32.775] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:11:32.775] [bbotk]            1.562222 101.3753        0      0           43.207 
## INFO  [04:11:32.775] [bbotk]                                 uhash 
## INFO  [04:11:32.775] [bbotk]  47d1ecd9-10d2-47e1-89e2-85ed4cfcb4f2 
## INFO  [04:11:32.778] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:11:32.878] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:11:32.895] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:11:49.071] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:12:03.173] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:12:28.475] [mlr3] Finished benchmark 
## INFO  [04:12:28.537] [bbotk] Result of batch 10: 
## INFO  [04:12:28.539] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:12:28.539] [bbotk]           0.6866667 101.3626        0      0           55.502 
## INFO  [04:12:28.539] [bbotk]                                 uhash 
## INFO  [04:12:28.539] [bbotk]  45892faf-acb2-4e5e-b831-f0c892d5d99d 
## INFO  [04:12:28.547] [bbotk] Finished optimizing after 10 evaluation(s) 
## INFO  [04:12:28.548] [bbotk] Result: 
## INFO  [04:12:28.550] [bbotk]  regr.glmnet.lambda learner_param_vals  x_domain regr.mse 
## INFO  [04:12:28.550] [bbotk]                0.03          <list[3]> <list[1]> 101.3591
at_ridge$predict(task_original, row_ids = test_set)$score()
## regr.mse 
## 78.64359
### regr.mse = 70.13002

Random Forests

learner_rf <- lrn('regr.ranger') 
learner_rf$param_set$values <- list(min.node.size = 4)
gr_rf <- po('scale') %>>%
  po('imputemean') %>>%
  po(learner_rf)
glrn_rf <- GraphLearner$new(gr_rf)
tune_ntrees <- ParamSet$new (list(
  ParamInt$new('regr.ranger.num.trees', lower = 50, upper = 600)))

at_rf <- AutoTuner$new(
  learner = glrn_rf,
  resampling = rsmp('cv', folds = 3),
  measure = measure,
  search_space = tune_ntrees,
  terminator = terminator,
  tuner = tuner)

at_rf$train(task_original, row_ids = train_set)
## INFO  [04:12:48.867] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]' 
## INFO  [04:12:48.891] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:12:49.015] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:12:49.045] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:13:10.142] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:13:30.389] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:13:52.395] [mlr3] Finished benchmark 
## INFO  [04:13:52.496] [bbotk] Result of batch 1: 
## INFO  [04:13:52.503] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:13:52.503] [bbotk]                    172 106.9784        0      0           63.184 
## INFO  [04:13:52.503] [bbotk]                                 uhash 
## INFO  [04:13:52.503] [bbotk]  8c6048fb-6bd9-46e8-89ce-022feed544ff 
## INFO  [04:13:52.506] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:13:52.605] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:13:52.625] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:14:22.159] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:14:51.381] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:15:23.481] [mlr3] Finished benchmark 
## INFO  [04:15:23.893] [bbotk] Result of batch 2: 
## INFO  [04:15:23.916] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:15:23.916] [bbotk]                    600 106.7579        0      0             90.4 
## INFO  [04:15:23.916] [bbotk]                                 uhash 
## INFO  [04:15:23.916] [bbotk]  bfe83a56-1831-488a-9395-6565e76c5478 
## INFO  [04:15:23.930] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:15:24.228] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:15:24.299] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:15:51.014] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:16:17.163] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:16:41.515] [mlr3] Finished benchmark 
## INFO  [04:16:41.874] [bbotk] Result of batch 3: 
## INFO  [04:16:41.890] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:16:41.890] [bbotk]                    356 107.1674        0      0           76.932 
## INFO  [04:16:41.890] [bbotk]                                 uhash 
## INFO  [04:16:41.890] [bbotk]  12586678-1444-4d0a-88b8-74ed5b084863 
## INFO  [04:16:41.897] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:16:42.197] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:16:42.237] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:17:09.079] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:17:34.603] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:18:00.931] [mlr3] Finished benchmark 
## INFO  [04:18:01.153] [bbotk] Result of batch 4: 
## INFO  [04:18:01.163] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:18:01.163] [bbotk]                    478 107.0987        0      0           78.531 
## INFO  [04:18:01.163] [bbotk]                                 uhash 
## INFO  [04:18:01.163] [bbotk]  7fa40647-5990-434d-bac4-d568070b3847 
## INFO  [04:18:01.177] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:18:01.372] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:18:01.407] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:18:22.273] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:18:42.466] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:19:03.414] [mlr3] Finished benchmark 
## INFO  [04:19:03.620] [bbotk] Result of batch 5: 
## INFO  [04:19:03.628] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:19:03.628] [bbotk]                    233 106.9801        0      0           61.872 
## INFO  [04:19:03.628] [bbotk]                                 uhash 
## INFO  [04:19:03.628] [bbotk]  910106cf-c31d-4b96-93e8-4aa55dced883 
## INFO  [04:19:03.632] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:19:03.801] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:19:03.821] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:19:32.984] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:19:58.931] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:20:27.035] [mlr3] Finished benchmark 
## INFO  [04:20:27.270] [bbotk] Result of batch 6: 
## INFO  [04:20:27.275] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:20:27.275] [bbotk]                    539 107.0493        0      0           83.052 
## INFO  [04:20:27.275] [bbotk]                                 uhash 
## INFO  [04:20:27.275] [bbotk]  cdd2dab4-7764-4d52-9274-c4025d30d9c2 
## INFO  [04:20:27.279] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:20:27.413] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:20:27.431] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:20:48.651] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:21:10.526] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:21:32.226] [mlr3] Finished benchmark 
## INFO  [04:21:32.413] [bbotk] Result of batch 7: 
## INFO  [04:21:32.418] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:21:32.418] [bbotk]                    294 107.1791        0      0           64.646 
## INFO  [04:21:32.418] [bbotk]                                 uhash 
## INFO  [04:21:32.418] [bbotk]  4221803d-d8ca-42d1-8490-c831d61202e0 
## INFO  [04:21:32.423] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:21:32.552] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:21:32.574] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:21:57.104] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:22:21.523] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:22:45.686] [mlr3] Finished benchmark 
## INFO  [04:22:45.895] [bbotk] Result of batch 8: 
## INFO  [04:22:45.903] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:22:45.903] [bbotk]                    417 107.1271        0      0           72.987 
## INFO  [04:22:45.903] [bbotk]                                 uhash 
## INFO  [04:22:45.903] [bbotk]  57b7eb28-5980-4847-a2a7-44af08aca112 
## INFO  [04:22:45.907] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:22:46.066] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:22:46.094] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:23:03.924] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:23:20.903] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:23:37.687] [mlr3] Finished benchmark 
## INFO  [04:23:37.863] [bbotk] Result of batch 9: 
## INFO  [04:23:37.869] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:23:37.869] [bbotk]                    111 107.1279        0      0           51.471 
## INFO  [04:23:37.869] [bbotk]                                 uhash 
## INFO  [04:23:37.869] [bbotk]  b471d40b-1673-41b6-ab52-fbd972c8f5a6 
## INFO  [04:23:37.874] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:23:38.023] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:23:38.040] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:23:54.461] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:24:10.505] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:24:27.269] [mlr3] Finished benchmark 
## INFO  [04:24:27.452] [bbotk] Result of batch 10: 
## INFO  [04:24:27.458] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:24:27.458] [bbotk]                     50 107.7743        0      0           49.118 
## INFO  [04:24:27.458] [bbotk]                                 uhash 
## INFO  [04:24:27.458] [bbotk]  b9b7f423-d2ae-42b8-a9af-b66c2adfb2de 
## INFO  [04:24:27.543] [bbotk] Finished optimizing after 10 evaluation(s) 
## INFO  [04:24:27.548] [bbotk] Result: 
## INFO  [04:24:27.553] [bbotk]  regr.ranger.num.trees learner_param_vals  x_domain regr.mse 
## INFO  [04:24:27.553] [bbotk]                    600          <list[3]> <list[1]> 106.7579
at_rf$predict(task_original, row_ids = test_set)$score()
## regr.mse 
## 83.88102
### regr.mse = 35.71414

Benchmarking

set.seed(123)

# list of learners
lrn_list <- list(
  glrn_lm,
  glrn_ridge,
  at_ridge,
  at_rf
)

# Set the benchmark design and run the comparisons
bm_design <- benchmark_grid(task = task_original, resamplings = rsmp('cv', folds = 3), 
                            learners = lrn_list)
bmr <- benchmark(bm_design, store_models = TRUE)
## INFO  [04:25:06.391] [mlr3] Running benchmark with 12 resampling iterations 
## INFO  [04:25:06.411] [mlr3] Applying learner 'imputemean.regr.lm' on task 'Delay' (iter 1/3) 
## INFO  [04:25:12.588] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:25:27.166] [mlr3] Applying learner 'scale.imputemean.regr.ranger.tuned' on task 'Delay' (iter 2/3) 
## INFO  [04:25:27.895] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]' 
## INFO  [04:25:27.916] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:25:28.129] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:25:28.165] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:25:45.221] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:26:00.662] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:26:16.352] [mlr3] Finished benchmark 
## INFO  [04:26:16.553] [bbotk] Result of batch 1: 
## INFO  [04:26:16.560] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:26:16.560] [bbotk]                     50 101.7081        0      0            48.02 
## INFO  [04:26:16.560] [bbotk]                                 uhash 
## INFO  [04:26:16.560] [bbotk]  db5e87ed-c85d-4606-b16e-6fe6d65878df 
## INFO  [04:26:16.566] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:26:16.786] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:26:16.812] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:26:37.511] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:26:56.552] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:27:17.484] [mlr3] Finished benchmark 
## INFO  [04:27:17.660] [bbotk] Result of batch 2: 
## INFO  [04:27:17.673] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:27:17.673] [bbotk]                    294 101.2209        0      0           60.577 
## INFO  [04:27:17.673] [bbotk]                                 uhash 
## INFO  [04:27:17.673] [bbotk]  c96c2a83-726a-465d-a076-49079309b3ad 
## INFO  [04:27:17.676] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:27:17.834] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:27:17.880] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:27:34.514] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:27:51.515] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:28:08.070] [mlr3] Finished benchmark 
## INFO  [04:28:08.133] [bbotk] Result of batch 3: 
## INFO  [04:28:08.136] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:28:08.136] [bbotk]                    111 101.6412        0      0           50.096 
## INFO  [04:28:08.136] [bbotk]                                 uhash 
## INFO  [04:28:08.136] [bbotk]  e18ecc07-6b72-4aa6-831b-7d89bcf1f716 
## INFO  [04:28:08.138] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:28:08.189] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:28:08.205] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:28:32.206] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:28:58.013] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:29:21.826] [mlr3] Finished benchmark 
## INFO  [04:29:22.010] [bbotk] Result of batch 4: 
## INFO  [04:29:22.016] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:29:22.016] [bbotk]                    539 100.9735        0      0           73.525 
## INFO  [04:29:22.016] [bbotk]                                 uhash 
## INFO  [04:29:22.016] [bbotk]  05dbbba1-c650-412d-983e-7a4ea212a375 
## INFO  [04:29:22.022] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:29:22.286] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:29:22.344] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:29:44.256] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:30:05.663] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:30:26.298] [mlr3] Finished benchmark 
## INFO  [04:30:26.362] [bbotk] Result of batch 5: 
## INFO  [04:30:26.364] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:30:26.364] [bbotk]                    356 101.1191        0      0           63.891 
## INFO  [04:30:26.364] [bbotk]                                 uhash 
## INFO  [04:30:26.364] [bbotk]  18f4a04a-714d-4a23-9389-8b1955f225b3 
## INFO  [04:30:26.366] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:30:26.409] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:30:26.418] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:30:49.024] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:31:12.113] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:31:42.188] [mlr3] Finished benchmark 
## INFO  [04:31:42.276] [bbotk] Result of batch 6: 
## INFO  [04:31:42.279] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:31:42.279] [bbotk]                    478 101.0684        0      0           75.697 
## INFO  [04:31:42.279] [bbotk]                                 uhash 
## INFO  [04:31:42.279] [bbotk]  b5c8e22c-f7b5-454a-ae9f-5d16fb038e80 
## INFO  [04:31:42.281] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:31:42.335] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:31:42.347] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:32:13.135] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:32:41.323] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:33:09.826] [mlr3] Finished benchmark 
## INFO  [04:33:10.269] [bbotk] Result of batch 7: 
## INFO  [04:33:10.307] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:33:10.307] [bbotk]                    600 100.9992        0      0           87.171 
## INFO  [04:33:10.307] [bbotk]                                 uhash 
## INFO  [04:33:10.307] [bbotk]  2737bee5-bf83-4074-9f46-0776f3a817f1 
## INFO  [04:33:10.315] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:33:10.591] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:33:10.647] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:33:31.269] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:33:52.609] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:34:12.103] [mlr3] Finished benchmark 
## INFO  [04:34:12.515] [bbotk] Result of batch 8: 
## INFO  [04:34:12.533] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:34:12.533] [bbotk]                    233 101.1252        0      0           61.248 
## INFO  [04:34:12.533] [bbotk]                                 uhash 
## INFO  [04:34:12.533] [bbotk]  fe38a62b-7532-448e-b40f-ffc50393a03e 
## INFO  [04:34:12.541] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:34:12.815] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:34:12.849] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:34:41.311] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:35:07.207] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:35:31.000] [mlr3] Finished benchmark 
## INFO  [04:35:31.302] [bbotk] Result of batch 9: 
## INFO  [04:35:31.314] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:35:31.314] [bbotk]                    172 101.2901        0      0           77.886 
## INFO  [04:35:31.314] [bbotk]                                 uhash 
## INFO  [04:35:31.314] [bbotk]  b69669d7-474d-4a46-a2cb-b8729293b445 
## INFO  [04:35:31.321] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:35:31.554] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:35:31.583] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:36:05.160] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:36:36.812] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:37:09.519] [mlr3] Finished benchmark 
## INFO  [04:37:09.810] [bbotk] Result of batch 10: 
## INFO  [04:37:09.820] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:37:09.820] [bbotk]                    417 101.2634        0      0           97.611 
## INFO  [04:37:09.820] [bbotk]                                 uhash 
## INFO  [04:37:09.820] [bbotk]  ae570db2-a02a-4913-8419-9f8242ad0c3b 
## INFO  [04:37:09.875] [bbotk] Finished optimizing after 10 evaluation(s) 
## INFO  [04:37:09.877] [bbotk] Result: 
## INFO  [04:37:09.883] [bbotk]  regr.ranger.num.trees learner_param_vals  x_domain regr.mse 
## INFO  [04:37:09.883] [bbotk]                    539          <list[3]> <list[1]> 100.9735 
## INFO  [04:38:01.966] [mlr3] Applying learner 'imputemean.regr.lm' on task 'Delay' (iter 3/3) 
## INFO  [04:38:11.582] [mlr3] Applying learner 'imputemean.regr.lm' on task 'Delay' (iter 2/3) 
## INFO  [04:38:19.873] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:38:40.024] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:38:57.149] [mlr3] Applying learner 'scale.imputemean.regr.ranger.tuned' on task 'Delay' (iter 3/3) 
## INFO  [04:38:58.322] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]' 
## INFO  [04:38:58.345] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:38:58.510] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:38:58.552] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:39:38.406] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:40:13.366] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:40:43.885] [mlr3] Finished benchmark 
## INFO  [04:40:44.240] [bbotk] Result of batch 1: 
## INFO  [04:40:44.261] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:40:44.261] [bbotk]                    600  93.1543        0      0          104.869 
## INFO  [04:40:44.261] [bbotk]                                 uhash 
## INFO  [04:40:44.261] [bbotk]  4e6f5d13-7e37-4e68-96cf-da7795394ccd 
## INFO  [04:40:44.268] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:40:44.415] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:40:44.442] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:41:08.574] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:41:43.022] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:42:19.044] [mlr3] Finished benchmark 
## INFO  [04:42:19.508] [bbotk] Result of batch 2: 
## INFO  [04:42:19.524] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:42:19.524] [bbotk]                    233 93.49332        0      0           93.993 
## INFO  [04:42:19.524] [bbotk]                                 uhash 
## INFO  [04:42:19.524] [bbotk]  a2bb6f7f-2dfc-4c7c-b5c7-8ee09b415246 
## INFO  [04:42:19.534] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:42:19.815] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:42:19.888] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:42:52.893] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:43:33.623] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:44:19.392] [mlr3] Finished benchmark 
## INFO  [04:44:20.826] [bbotk] Result of batch 3: 
## INFO  [04:44:20.853] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:44:20.853] [bbotk]                    356 93.22086        0      0          118.789 
## INFO  [04:44:20.853] [bbotk]                                 uhash 
## INFO  [04:44:20.853] [bbotk]  359e4b6f-b74a-40e8-b2b8-c77f4b29f3d3 
## INFO  [04:44:20.866] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:44:21.198] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:44:21.264] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:45:14.562] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:47:23.522] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:48:01.467] [mlr3] Finished benchmark 
## INFO  [04:48:02.519] [bbotk] Result of batch 4: 
## INFO  [04:48:02.543] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:48:02.543] [bbotk]                    294 93.17571        0      0          219.125 
## INFO  [04:48:02.543] [bbotk]                                 uhash 
## INFO  [04:48:02.543] [bbotk]  6839a2d9-6d1b-4b73-84f5-a48df088f98e 
## INFO  [04:48:02.552] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:48:02.777] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:48:02.822] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:48:23.131] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:48:42.226] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:48:57.805] [mlr3] Finished benchmark 
## INFO  [04:48:58.047] [bbotk] Result of batch 5: 
## INFO  [04:48:58.052] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:48:58.052] [bbotk]                     50 93.87929        0      0           54.801 
## INFO  [04:48:58.052] [bbotk]                                 uhash 
## INFO  [04:48:58.052] [bbotk]  08c18333-fa85-4ebd-ba49-d4c01291bad2 
## INFO  [04:48:58.055] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:48:58.123] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:48:58.137] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:49:24.509] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:49:55.676] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:50:38.863] [mlr3] Finished benchmark 
## INFO  [04:50:39.202] [bbotk] Result of batch 6: 
## INFO  [04:50:39.224] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:50:39.224] [bbotk]                    417 93.39374        0      0          100.426 
## INFO  [04:50:39.224] [bbotk]                                 uhash 
## INFO  [04:50:39.224] [bbotk]  b8e3cff2-12b4-4c47-a429-729c391c17ed 
## INFO  [04:50:39.234] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:50:39.566] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:50:39.604] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:51:03.838] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:51:27.571] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:51:48.030] [mlr3] Finished benchmark 
## INFO  [04:51:48.276] [bbotk] Result of batch 7: 
## INFO  [04:51:48.293] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:51:48.293] [bbotk]                    172 93.09661        0      0           68.128 
## INFO  [04:51:48.293] [bbotk]                                 uhash 
## INFO  [04:51:48.293] [bbotk]  4ea6fe48-87d4-4523-9d6f-4b15c9bed0f6 
## INFO  [04:51:48.301] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:51:48.512] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:51:48.545] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:52:15.536] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:52:32.574] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:52:54.544] [mlr3] Finished benchmark 
## INFO  [04:52:54.893] [bbotk] Result of batch 8: 
## INFO  [04:52:54.914] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:52:54.914] [bbotk]                    111 93.51544        0      0           65.826 
## INFO  [04:52:54.914] [bbotk]                                 uhash 
## INFO  [04:52:54.914] [bbotk]  8a4d09be-00b2-4aa3-9918-1600289f26a5 
## INFO  [04:52:54.921] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:52:55.154] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:52:55.214] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:53:24.946] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:54:01.465] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:54:35.387] [mlr3] Finished benchmark 
## INFO  [04:54:35.669] [bbotk] Result of batch 9: 
## INFO  [04:54:35.679] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:54:35.679] [bbotk]                    478 93.33387        0      0           99.846 
## INFO  [04:54:35.679] [bbotk]                                 uhash 
## INFO  [04:54:35.679] [bbotk]  14c3442b-9319-4cc9-b19b-62f4dbd527ab 
## INFO  [04:54:35.683] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:54:35.785] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:54:35.799] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [04:55:10.709] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [04:55:43.360] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [04:56:18.532] [mlr3] Finished benchmark 
## INFO  [04:56:19.111] [bbotk] Result of batch 10: 
## INFO  [04:56:19.125] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [04:56:19.125] [bbotk]                    539 93.05796        0      0          102.395 
## INFO  [04:56:19.125] [bbotk]                                 uhash 
## INFO  [04:56:19.125] [bbotk]  2cf23859-575c-4bc2-a87f-0328524f3870 
## INFO  [04:56:19.209] [bbotk] Finished optimizing after 10 evaluation(s) 
## INFO  [04:56:19.211] [bbotk] Result: 
## INFO  [04:56:19.218] [bbotk]  regr.ranger.num.trees learner_param_vals  x_domain regr.mse 
## INFO  [04:56:19.218] [bbotk]                    539          <list[3]> <list[1]> 93.05796 
## INFO  [04:57:02.019] [mlr3] Applying learner 'scale.imputemean.regr.glmnet.tuned' on task 'Delay' (iter 2/3) 
## INFO  [04:57:02.742] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]' 
## INFO  [04:57:02.762] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:57:02.872] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:57:02.898] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:57:19.966] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:57:37.031] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:58:00.419] [mlr3] Finished benchmark 
## INFO  [04:58:00.719] [bbotk] Result of batch 1: 
## INFO  [04:58:00.732] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:58:00.732] [bbotk]            1.562222 98.23961        0      0           57.197 
## INFO  [04:58:00.732] [bbotk]                                 uhash 
## INFO  [04:58:00.732] [bbotk]  18f408e8-7349-4d76-ae8f-0f0ca7838c17 
## INFO  [04:58:00.740] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:58:01.103] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:58:01.227] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:58:16.544] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:58:32.435] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:58:46.364] [mlr3] Finished benchmark 
## INFO  [04:58:46.595] [bbotk] Result of batch 2: 
## INFO  [04:58:46.613] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:58:46.613] [bbotk]           0.2488889  98.2254        0      0           44.969 
## INFO  [04:58:46.613] [bbotk]                                 uhash 
## INFO  [04:58:46.613] [bbotk]  a335b023-87bd-497c-afb4-ae606d0f639a 
## INFO  [04:58:46.624] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:58:49.867] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:58:49.970] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [04:59:20.121] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [04:59:40.484] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [04:59:58.924] [mlr3] Finished benchmark 
## INFO  [04:59:59.315] [bbotk] Result of batch 3: 
## INFO  [04:59:59.331] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [04:59:59.331] [bbotk]           0.4677778 98.22634        0      0           68.614 
## INFO  [04:59:59.331] [bbotk]                                 uhash 
## INFO  [04:59:59.331] [bbotk]  66bf0b6f-0601-4759-9148-2a5e90c53372 
## INFO  [04:59:59.340] [bbotk] Evaluating 1 configuration(s) 
## INFO  [04:59:59.610] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [04:59:59.658] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:00:17.608] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:00:31.310] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:00:51.720] [mlr3] Finished benchmark 
## INFO  [05:00:52.205] [bbotk] Result of batch 4: 
## INFO  [05:00:52.222] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:00:52.222] [bbotk]            1.781111 98.24353        0      0           51.874 
## INFO  [05:00:52.222] [bbotk]                                 uhash 
## INFO  [05:00:52.222] [bbotk]  c7580234-4fbd-4d5d-9ea7-e758b0dc8097 
## INFO  [05:00:52.231] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:00:52.506] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:00:52.545] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:01:09.423] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:01:34.678] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:01:54.785] [mlr3] Finished benchmark 
## INFO  [05:01:55.258] [bbotk] Result of batch 5: 
## INFO  [05:01:55.272] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:01:55.272] [bbotk]            1.343333 98.23604        0      0           61.912 
## INFO  [05:01:55.272] [bbotk]                                 uhash 
## INFO  [05:01:55.272] [bbotk]  a6759be1-e5b2-42b9-a604-4a4b22310c75 
## INFO  [05:01:55.292] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:01:55.641] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:01:55.694] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:03:45.585] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:04:16.709] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:04:43.324] [mlr3] Finished benchmark 
## INFO  [05:04:44.135] [bbotk] Result of batch 6: 
## INFO  [05:04:44.200] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:04:44.200] [bbotk]                   2 98.24775        0      0          166.827 
## INFO  [05:04:44.200] [bbotk]                                 uhash 
## INFO  [05:04:44.200] [bbotk]  e5f1f380-8468-4812-ae6f-d4c833bb2bfa 
## INFO  [05:04:44.260] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:04:44.835] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:04:44.961] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:07:27.498] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:07:47.881] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:08:07.277] [mlr3] Finished benchmark 
## INFO  [05:08:07.657] [bbotk] Result of batch 7: 
## INFO  [05:08:07.673] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:08:07.673] [bbotk]                0.03 98.22522        0      0           201.62 
## INFO  [05:08:07.673] [bbotk]                                 uhash 
## INFO  [05:08:07.673] [bbotk]  f9756dd0-28da-4b29-9a5d-fe63298e1b67 
## INFO  [05:08:07.683] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:08:07.950] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:08:08.010] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:08:24.808] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:08:42.025] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:09:00.491] [mlr3] Finished benchmark 
## INFO  [05:09:00.879] [bbotk] Result of batch 8: 
## INFO  [05:09:00.894] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:09:00.894] [bbotk]           0.6866667 98.22795        0      0           52.195 
## INFO  [05:09:00.894] [bbotk]                                 uhash 
## INFO  [05:09:00.894] [bbotk]  5713da6a-207f-48c0-ac21-3a4ef9fcfb08 
## INFO  [05:09:00.899] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:09:01.164] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:09:01.211] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:09:19.069] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:09:34.881] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:09:48.686] [mlr3] Finished benchmark 
## INFO  [05:09:48.863] [bbotk] Result of batch 9: 
## INFO  [05:09:48.869] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:09:48.869] [bbotk]           0.9055556 98.23015        0      0           47.219 
## INFO  [05:09:48.869] [bbotk]                                 uhash 
## INFO  [05:09:48.869] [bbotk]  532a7faf-85e4-4052-be21-f3b07c9fa3df 
## INFO  [05:09:48.873] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:09:48.998] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:09:49.022] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:10:03.697] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:10:20.223] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:10:34.032] [mlr3] Finished benchmark 
## INFO  [05:10:34.476] [bbotk] Result of batch 10: 
## INFO  [05:10:34.486] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:10:34.486] [bbotk]            1.124444 98.23286        0      0           44.832 
## INFO  [05:10:34.486] [bbotk]                                 uhash 
## INFO  [05:10:34.486] [bbotk]  a90984fa-76d1-4b58-ad66-a6f22cf67bc6 
## INFO  [05:10:34.584] [bbotk] Finished optimizing after 10 evaluation(s) 
## INFO  [05:10:34.586] [bbotk] Result: 
## INFO  [05:10:34.597] [bbotk]  regr.glmnet.lambda learner_param_vals  x_domain regr.mse 
## INFO  [05:10:34.597] [bbotk]                0.03          <list[3]> <list[1]> 98.22522 
## INFO  [05:10:48.780] [mlr3] Applying learner 'scale.imputemean.regr.glmnet.tuned' on task 'Delay' (iter 3/3) 
## INFO  [05:10:49.318] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]' 
## INFO  [05:10:49.331] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:10:49.451] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:10:49.470] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:11:04.142] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:11:22.060] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:11:36.086] [mlr3] Finished benchmark 
## INFO  [05:11:36.337] [bbotk] Result of batch 1: 
## INFO  [05:11:36.348] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:11:36.348] [bbotk]           0.4677778 89.65833        0      0           46.346 
## INFO  [05:11:36.348] [bbotk]                                 uhash 
## INFO  [05:11:36.348] [bbotk]  8e15be5a-17b3-4821-af9a-88ddfe4aa967 
## INFO  [05:11:36.352] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:11:36.484] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:11:36.509] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:11:51.327] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:12:08.430] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:12:22.224] [mlr3] Finished benchmark 
## INFO  [05:12:22.429] [bbotk] Result of batch 2: 
## INFO  [05:12:22.438] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:12:22.438] [bbotk]            1.562222 89.67329        0      0           45.494 
## INFO  [05:12:22.438] [bbotk]                                 uhash 
## INFO  [05:12:22.438] [bbotk]  43b8b5b5-d3e2-4439-b98e-ee15be115694 
## INFO  [05:12:22.443] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:12:22.560] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:12:22.571] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:12:38.284] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:12:54.545] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:13:09.406] [mlr3] Finished benchmark 
## INFO  [05:13:09.630] [bbotk] Result of batch 3: 
## INFO  [05:13:09.639] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:13:09.639] [bbotk]           0.6866667 89.66019        0      0           46.695 
## INFO  [05:13:09.639] [bbotk]                                 uhash 
## INFO  [05:13:09.639] [bbotk]  14e4a2ed-3685-42a3-8e9e-bedabe9ef8b0 
## INFO  [05:13:09.645] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:13:09.765] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:13:09.782] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:13:28.831] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:13:45.223] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:13:58.363] [mlr3] Finished benchmark 
## INFO  [05:13:58.521] [bbotk] Result of batch 4: 
## INFO  [05:13:58.528] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:13:58.528] [bbotk]            1.343333 89.66931        0      0           48.406 
## INFO  [05:13:58.528] [bbotk]                                 uhash 
## INFO  [05:13:58.528] [bbotk]  74a9d11b-cee1-4361-b93f-564b784edb44 
## INFO  [05:13:58.534] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:13:58.632] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:13:58.644] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:14:12.913] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:14:29.827] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:14:43.602] [mlr3] Finished benchmark 
## INFO  [05:14:43.778] [bbotk] Result of batch 5: 
## INFO  [05:14:43.783] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:14:43.783] [bbotk]           0.9055556 89.66269        0      0           44.811 
## INFO  [05:14:43.783] [bbotk]                                 uhash 
## INFO  [05:14:43.783] [bbotk]  f8ac3ce4-d689-4fbb-bc2a-d306fb2886de 
## INFO  [05:14:43.786] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:14:43.921] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:14:43.937] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:15:02.657] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:15:16.527] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:15:31.662] [mlr3] Finished benchmark 
## INFO  [05:15:31.807] [bbotk] Result of batch 6: 
## INFO  [05:15:31.813] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:15:31.813] [bbotk]            1.124444 89.66575        0      0           47.525 
## INFO  [05:15:31.813] [bbotk]                                 uhash 
## INFO  [05:15:31.813] [bbotk]  ffb56033-50ea-4330-8106-bc4c8e42b71c 
## INFO  [05:15:31.817] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:15:31.900] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:15:31.919] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:15:48.216] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:16:02.117] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:16:19.574] [mlr3] Finished benchmark 
## INFO  [05:16:19.799] [bbotk] Result of batch 7: 
## INFO  [05:16:19.808] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:16:19.808] [bbotk]                0.03 89.65695        0      0           47.483 
## INFO  [05:16:19.808] [bbotk]                                 uhash 
## INFO  [05:16:19.808] [bbotk]  ff4b7aa4-e975-4be0-80bf-f86e979a8a60 
## INFO  [05:16:19.818] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:16:20.114] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:16:20.167] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:16:34.736] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:16:49.734] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:17:04.785] [mlr3] Finished benchmark 
## INFO  [05:17:05.078] [bbotk] Result of batch 8: 
## INFO  [05:17:05.089] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:17:05.089] [bbotk]           0.2488889 89.65722        0      0           44.415 
## INFO  [05:17:05.089] [bbotk]                                 uhash 
## INFO  [05:17:05.089] [bbotk]  48629a4f-4884-46f5-a623-499004f8c2d8 
## INFO  [05:17:05.095] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:17:05.244] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:17:05.256] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:17:21.816] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:17:36.777] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:17:52.340] [mlr3] Finished benchmark 
## INFO  [05:17:52.585] [bbotk] Result of batch 9: 
## INFO  [05:17:52.594] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:17:52.594] [bbotk]                   2 89.68232        0      0           46.855 
## INFO  [05:17:52.594] [bbotk]                                 uhash 
## INFO  [05:17:52.594] [bbotk]  61111ec1-bdb5-4793-9456-7c2abdb145cc 
## INFO  [05:17:52.601] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:17:52.938] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:17:53.032] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:18:06.766] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:18:20.638] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:18:42.146] [mlr3] Finished benchmark 
## INFO  [05:18:42.432] [bbotk] Result of batch 10: 
## INFO  [05:18:42.443] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:18:42.443] [bbotk]            1.781111 89.67764        0      0           48.953 
## INFO  [05:18:42.443] [bbotk]                                 uhash 
## INFO  [05:18:42.443] [bbotk]  0519da49-8068-4310-9343-9205dfc597c3 
## INFO  [05:18:42.506] [bbotk] Finished optimizing after 10 evaluation(s) 
## INFO  [05:18:42.509] [bbotk] Result: 
## INFO  [05:18:42.515] [bbotk]  regr.glmnet.lambda learner_param_vals  x_domain regr.mse 
## INFO  [05:18:42.515] [bbotk]                0.03          <list[3]> <list[1]> 89.65695 
## INFO  [05:19:04.191] [mlr3] Applying learner 'scale.imputemean.regr.glmnet.tuned' on task 'Delay' (iter 1/3) 
## INFO  [05:19:05.212] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]' 
## INFO  [05:19:05.219] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:19:05.294] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:19:05.320] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:19:31.978] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:20:27.209] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:20:54.844] [mlr3] Finished benchmark 
## INFO  [05:20:55.354] [bbotk] Result of batch 1: 
## INFO  [05:20:55.378] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:20:55.378] [bbotk]                0.03 95.76616        0      0          109.196 
## INFO  [05:20:55.378] [bbotk]                                 uhash 
## INFO  [05:20:55.378] [bbotk]  1ca3b836-a1f1-4cce-b9fe-cc51dcfbdf13 
## INFO  [05:20:55.385] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:20:55.628] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:20:55.647] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:21:10.493] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:21:27.367] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:21:47.672] [mlr3] Finished benchmark 
## INFO  [05:21:48.560] [bbotk] Result of batch 2: 
## INFO  [05:21:48.590] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:21:48.590] [bbotk]                   2 95.78784        0      0           51.551 
## INFO  [05:21:48.590] [bbotk]                                 uhash 
## INFO  [05:21:48.590] [bbotk]  8b40c5e0-8161-4a63-a9f9-b18887b2648c 
## INFO  [05:21:48.627] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:21:49.543] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:21:49.672] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:22:10.626] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:22:30.834] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:22:49.806] [mlr3] Finished benchmark 
## INFO  [05:22:50.037] [bbotk] Result of batch 3: 
## INFO  [05:22:50.048] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:22:50.048] [bbotk]            1.343333 95.77653        0      0           59.589 
## INFO  [05:22:50.048] [bbotk]                                 uhash 
## INFO  [05:22:50.048] [bbotk]  558cb2f2-b7bb-422e-8ac1-427e3e49bb18 
## INFO  [05:22:50.054] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:22:50.407] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:22:50.452] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:23:05.587] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:23:18.690] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:23:34.830] [mlr3] Finished benchmark 
## INFO  [05:23:35.250] [bbotk] Result of batch 4: 
## INFO  [05:23:35.267] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:23:35.267] [bbotk]           0.6866667 95.76875        0      0           44.195 
## INFO  [05:23:35.267] [bbotk]                                 uhash 
## INFO  [05:23:35.267] [bbotk]  44c44149-cddc-4825-b7d7-9d4527fc233a 
## INFO  [05:23:35.280] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:23:35.906] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:23:36.145] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:23:54.899] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:24:10.779] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:24:29.823] [mlr3] Finished benchmark 
## INFO  [05:24:30.204] [bbotk] Result of batch 5: 
## INFO  [05:24:30.223] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:24:30.223] [bbotk]           0.4677778  95.7672        0      0           53.317 
## INFO  [05:24:30.223] [bbotk]                                 uhash 
## INFO  [05:24:30.223] [bbotk]  2e53ea80-2c3d-4bc9-a847-3273f0a8d963 
## INFO  [05:24:30.241] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:24:30.682] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:24:30.773] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:24:51.202] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:25:07.990] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:25:23.056] [mlr3] Finished benchmark 
## INFO  [05:25:23.192] [bbotk] Result of batch 6: 
## INFO  [05:25:23.196] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:25:23.196] [bbotk]            1.124444 95.77348        0      0           51.945 
## INFO  [05:25:23.196] [bbotk]                                 uhash 
## INFO  [05:25:23.196] [bbotk]  96d932a6-0d7d-42bb-9d4f-2a5442f4952c 
## INFO  [05:25:23.198] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:25:23.302] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:25:23.321] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:25:38.360] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:25:52.365] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:26:07.488] [mlr3] Finished benchmark 
## INFO  [05:26:07.625] [bbotk] Result of batch 7: 
## INFO  [05:26:07.630] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:26:07.630] [bbotk]            1.781111 95.78376        0      0           43.994 
## INFO  [05:26:07.630] [bbotk]                                 uhash 
## INFO  [05:26:07.630] [bbotk]  45f0510d-f64c-497e-8239-09f680c09dd5 
## INFO  [05:26:07.633] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:26:07.719] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:26:07.740] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:26:44.946] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:26:59.837] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:27:17.904] [mlr3] Finished benchmark 
## INFO  [05:27:18.269] [bbotk] Result of batch 8: 
## INFO  [05:27:18.284] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:27:18.284] [bbotk]           0.9055556 95.77086        0      0           69.979 
## INFO  [05:27:18.284] [bbotk]                                 uhash 
## INFO  [05:27:18.284] [bbotk]  b3f0d7fd-a55b-4f2d-9cfb-01e3443067ef 
## INFO  [05:27:18.291] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:27:18.573] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:27:18.611] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:27:34.863] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:27:50.554] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:28:10.164] [mlr3] Finished benchmark 
## INFO  [05:28:10.575] [bbotk] Result of batch 9: 
## INFO  [05:28:10.587] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:28:10.587] [bbotk]           0.2488889 95.76631        0      0            51.38 
## INFO  [05:28:10.587] [bbotk]                                 uhash 
## INFO  [05:28:10.587] [bbotk]  eb30c4ca-6bee-4978-b5d1-cc32112d496d 
## INFO  [05:28:10.593] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:28:10.910] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:28:10.950] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3) 
## INFO  [05:28:26.604] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3) 
## INFO  [05:28:42.884] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3) 
## INFO  [05:28:57.596] [mlr3] Finished benchmark 
## INFO  [05:28:57.810] [bbotk] Result of batch 10: 
## INFO  [05:28:57.817] [bbotk]  regr.glmnet.lambda regr.mse warnings errors runtime_learners 
## INFO  [05:28:57.817] [bbotk]            1.562222 95.77998        0      0           46.518 
## INFO  [05:28:57.817] [bbotk]                                 uhash 
## INFO  [05:28:57.817] [bbotk]  a5491f00-f8a4-484e-8967-e9d3186d5d8e 
## INFO  [05:28:57.886] [bbotk] Finished optimizing after 10 evaluation(s) 
## INFO  [05:28:57.888] [bbotk] Result: 
## INFO  [05:28:57.895] [bbotk]  regr.glmnet.lambda learner_param_vals  x_domain regr.mse 
## INFO  [05:28:57.895] [bbotk]                0.03          <list[3]> <list[1]> 95.76616 
## INFO  [05:29:14.478] [mlr3] Applying learner 'scale.imputemean.regr.ranger.tuned' on task 'Delay' (iter 1/3) 
## INFO  [05:29:15.310] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]' 
## INFO  [05:29:15.332] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:29:15.492] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:29:15.537] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:29:32.794] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:29:46.953] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:30:03.771] [mlr3] Finished benchmark 
## INFO  [05:30:04.023] [bbotk] Result of batch 1: 
## INFO  [05:30:04.037] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:30:04.037] [bbotk]                     50 101.6354        0      0           48.047 
## INFO  [05:30:04.037] [bbotk]                                 uhash 
## INFO  [05:30:04.037] [bbotk]  2e7f84af-5545-4841-b21e-f1eba4afd333 
## INFO  [05:30:04.048] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:30:04.259] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:30:04.289] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:30:23.210] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:30:38.516] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:30:55.942] [mlr3] Finished benchmark 
## INFO  [05:30:56.208] [bbotk] Result of batch 2: 
## INFO  [05:30:56.216] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:30:56.216] [bbotk]                    111 100.8052        0      0           51.547 
## INFO  [05:30:56.216] [bbotk]                                 uhash 
## INFO  [05:30:56.216] [bbotk]  72c26eaa-3fc7-4953-b6d0-68b36c71c4b0 
## INFO  [05:30:56.223] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:30:56.388] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:30:56.411] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:31:17.292] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:31:37.714] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:31:59.639] [mlr3] Finished benchmark 
## INFO  [05:31:59.879] [bbotk] Result of batch 3: 
## INFO  [05:31:59.890] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:31:59.890] [bbotk]                    294  100.784        0      0           63.093 
## INFO  [05:31:59.890] [bbotk]                                 uhash 
## INFO  [05:31:59.890] [bbotk]  69f34ed5-f41f-448c-b50e-3bc0c5fefb81 
## INFO  [05:31:59.901] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:32:00.103] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:32:00.137] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:32:25.782] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:32:50.133] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:33:15.679] [mlr3] Finished benchmark 
## INFO  [05:33:15.781] [bbotk] Result of batch 4: 
## INFO  [05:33:15.785] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:33:15.785] [bbotk]                    539 100.8647        0      0           75.435 
## INFO  [05:33:15.785] [bbotk]                                 uhash 
## INFO  [05:33:15.785] [bbotk]  d9ee3dca-ea30-486a-ba47-862ad1ed8738 
## INFO  [05:33:15.788] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:33:15.910] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:33:15.927] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:33:37.756] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:34:00.712] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:34:22.605] [mlr3] Finished benchmark 
## INFO  [05:34:22.694] [bbotk] Result of batch 5: 
## INFO  [05:34:22.699] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:34:22.699] [bbotk]                    417  100.505        0      0           66.551 
## INFO  [05:34:22.699] [bbotk]                                 uhash 
## INFO  [05:34:22.699] [bbotk]  f8d08cb0-8bd3-4442-844e-9826835e1433 
## INFO  [05:34:22.702] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:34:22.821] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:34:22.838] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:34:44.174] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:35:04.689] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:35:26.417] [mlr3] Finished benchmark 
## INFO  [05:35:26.636] [bbotk] Result of batch 6: 
## INFO  [05:35:26.643] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:35:26.643] [bbotk]                    356 100.5993        0      0             63.5 
## INFO  [05:35:26.643] [bbotk]                                 uhash 
## INFO  [05:35:26.643] [bbotk]  2c5ff1a9-a22b-430c-9b16-bceed6c0cffc 
## INFO  [05:35:26.648] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:35:26.817] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:35:26.846] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:35:50.178] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:36:13.301] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:36:37.225] [mlr3] Finished benchmark 
## INFO  [05:36:37.341] [bbotk] Result of batch 7: 
## INFO  [05:36:37.348] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:36:37.348] [bbotk]                    478 100.8171        0      0           70.293 
## INFO  [05:36:37.348] [bbotk]                                 uhash 
## INFO  [05:36:37.348] [bbotk]  31b8b2e3-9fa5-4173-b44d-5b7c41ef46ea 
## INFO  [05:36:37.353] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:36:37.481] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:36:37.503] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:36:54.885] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:37:10.624] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:37:29.015] [mlr3] Finished benchmark 
## INFO  [05:37:29.190] [bbotk] Result of batch 8: 
## INFO  [05:37:29.197] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:37:29.197] [bbotk]                    172 100.5946        0      0           51.418 
## INFO  [05:37:29.197] [bbotk]                                 uhash 
## INFO  [05:37:29.197] [bbotk]  103cdefa-9d5c-41bf-8f15-7118b32db48c 
## INFO  [05:37:29.202] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:37:29.364] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:37:29.385] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:37:46.916] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:38:05.774] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:38:24.240] [mlr3] Finished benchmark 
## INFO  [05:38:24.305] [bbotk] Result of batch 9: 
## INFO  [05:38:24.307] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:38:24.307] [bbotk]                    233 100.9425        0      0           54.744 
## INFO  [05:38:24.307] [bbotk]                                 uhash 
## INFO  [05:38:24.307] [bbotk]  3eb3b18c-5c06-4d9c-8f1b-3ea2555324f6 
## INFO  [05:38:24.309] [bbotk] Evaluating 1 configuration(s) 
## INFO  [05:38:24.354] [mlr3] Running benchmark with 3 resampling iterations 
## INFO  [05:38:24.366] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3) 
## INFO  [05:38:50.036] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3) 
## INFO  [05:39:16.375] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3) 
## INFO  [05:39:44.380] [mlr3] Finished benchmark 
## INFO  [05:39:44.458] [bbotk] Result of batch 10: 
## INFO  [05:39:44.462] [bbotk]  regr.ranger.num.trees regr.mse warnings errors runtime_learners 
## INFO  [05:39:44.462] [bbotk]                    600 100.9782        0      0           79.936 
## INFO  [05:39:44.462] [bbotk]                                 uhash 
## INFO  [05:39:44.462] [bbotk]  7129e28c-6760-4d24-80a2-11c73f84b2fb 
## INFO  [05:39:44.476] [bbotk] Finished optimizing after 10 evaluation(s) 
## INFO  [05:39:44.477] [bbotk] Result: 
## INFO  [05:39:44.480] [bbotk]  regr.ranger.num.trees learner_param_vals  x_domain regr.mse 
## INFO  [05:39:44.480] [bbotk]                    417          <list[3]> <list[1]>  100.505 
## INFO  [05:40:14.507] [mlr3] Finished benchmark
autoplot(bmr) + 
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

bmr$aggregate(measure)
##    nr      resample_result task_id                         learner_id
## 1:  1 <ResampleResult[22]>   Delay                 imputemean.regr.lm
## 2:  2 <ResampleResult[22]>   Delay       scale.imputemean.regr.glmnet
## 3:  3 <ResampleResult[22]>   Delay scale.imputemean.regr.glmnet.tuned
## 4:  4 <ResampleResult[22]>   Delay scale.imputemean.regr.ranger.tuned
##    resampling_id iters regr.mse
## 1:            cv     3 94.54240
## 2:            cv     3 94.54240
## 3:            cv     3 94.54238
## 4:            cv     3 99.45913
#nr      resample_result task_id
#1:  1 <ResampleResult[22]>   Delay
#2:  2 <ResampleResult[22]>   Delay
#3:  3 <ResampleResult[22]>   Delay
#4:  4 <ResampleResult[22]>   Delay
#learner_id resampling_id iters
#1:                 imputemean.regr.lm            cv     3
#2:       scale.imputemean.regr.glmnet            cv     3
#3: scale.imputemean.regr.glmnet.tuned            cv     3
#4: scale.imputemean.regr.ranger.tuned            cv     3
#regr.mse
#1: 68.32703
#2: 68.32712
#3: 68.32669
#4: 31.95522

Model conclusion:

Random Forests performed the best over the other regression models with an MSE of 31.955.